-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-open Pxp_types
-open Pxp_lexer_types
-open Pxp_lexers
-open Pxp_entity
-open Pxp_aux
-open Pxp_dfa
-
-(**********************************************************************)
-
-class dtd the_warner init_encoding =
- object (self)
- val mutable root = (None : string option)
- val mutable id = (None : dtd_id option)
-
- val warner = (the_warner : collect_warnings)
- val encoding = init_encoding
- val lexerset = Pxp_lexers.get_lexer_set init_encoding
-
- val elements = (Hashtbl.create 100 : (string,dtd_element) Hashtbl.t)
- val gen_entities = (Hashtbl.create 100 : (string,entity * bool) Hashtbl.t)
- val par_entities = (Hashtbl.create 100 : (string,entity) Hashtbl.t)
- val notations = (Hashtbl.create 100 : (string,dtd_notation) Hashtbl.t)
- val pinstr = (Hashtbl.create 100 : (string,proc_instruction) Hashtbl.t)
- val mutable element_names = []
- val mutable gen_entity_names = []
- val mutable par_entity_names = []
- val mutable notation_names = []
- val mutable pinstr_names = []
-
- val mutable allow_arbitrary = false
- val mutable standalone_declaration = false
-
- val mutable validated = false
-
- initializer
- let w = new drop_warnings in
- self # add_gen_entity
- (new internal_entity self "lt" w "<" false false false encoding)
- false;
- self # add_gen_entity
- (new internal_entity self "gt" w ">" false false false encoding)
- false;
- self # add_gen_entity
- (new internal_entity self "amp" w "&" false false false encoding)
- false;
- self # add_gen_entity
- (new internal_entity self "apos" w "'" false false false encoding)
- false;
- self # add_gen_entity
- (new internal_entity self "quot" w """ false false false encoding)
- false;
-
-
- method encoding = encoding
-
- method warner = warner
-
- method set_root r =
- if root = None then
- root <- Some r
- else
- assert false
-
-
- method set_id j =
- if id = None then
- id <- Some j
- else
- assert false
-
-
- method standalone_declaration = standalone_declaration
-
- method set_standalone_declaration b =
- standalone_declaration <- b
-
- method allow_arbitrary =
- allow_arbitrary <- true
-
- method disallow_arbitrary =
- allow_arbitrary <- false
-
- method arbitrary_allowed = allow_arbitrary
-
- method root = root
- method id = id
-
-
- method add_element el =
- (* raises Not_found if 'el' has already been added *)
- (* Note: 'el' is encoded in the same way as 'self'! *)
- let name = el # name in
- check_name warner name;
- if Hashtbl.mem elements name then
- raise Not_found;
- Hashtbl.add elements name el;
- element_names <- name :: element_names;
- validated <- false
-
-
- method add_gen_entity en extdecl =
- (* The following is commented out; perhaps there should be an option
- * to reactivate it on demand
- *)
- (* raises Validation_error if the predefines entities 'lt', 'gt', 'amp',
- * 'quot', and 'apos' are redeclared with an improper value.
- *)
- if en # encoding <> encoding then
- failwith "Pxp_dtd.dtd # add_gen_entity: Inconsistent encodings";
- let name = en # name in
- check_name warner name;
- if Hashtbl.mem gen_entities name then begin
- if List.mem name [ "lt"; "gt"; "amp"; "quot"; "apos" ] then begin
- (* These are allowed to be declared several times *)
- let (rt,_) = en # replacement_text in
- let toks = tokens_of_content_string lexerset rt in
- try
- begin match toks with
- [CRef 60] -> if name <> "lt" then raise Not_found
- | [CharData ">"] -> if name <> "gt" then raise Not_found
- | [CRef 62] -> if name <> "gt" then raise Not_found
- | [CRef 38] -> if name <> "amp" then raise Not_found
- | [CharData "'"] -> if name <> "apos" then raise Not_found
- | [CRef 39] -> if name <> "apos" then raise Not_found
- | [CharData "\""] -> if name <> "quot" then raise Not_found
- | [CRef 34] -> if name <> "quot" then raise Not_found
- | _ -> raise Not_found
- end
- with
- Not_found ->
- raise (Validation_error("Predefined entity `" ^ name ^
- "' redeclared"))
- end
- else
- warner # warn ("Entity `" ^ name ^ "' declared twice")
- end
- else begin
- Hashtbl.add gen_entities name (en, extdecl);
- gen_entity_names <- name :: gen_entity_names
- end
-
-
- method add_par_entity en =
- if en # encoding <> encoding then
- failwith "Pxp_dtd.dtd # add_par_entity: Inconsistent encodings";
- let name = en # name in
- check_name warner name;
- if not (Hashtbl.mem par_entities name) then begin
- Hashtbl.add par_entities name en;
- par_entity_names <- name :: par_entity_names
- end
- else
- warner # warn ("Entity `" ^ name ^ "' declared twice")
-
-
- method add_notation no =
- (* raises Validation_error if 'no' already added *)
- if no # encoding <> encoding then
- failwith "Pxp_dtd.dtd # add_notation: Inconsistent encodings";
- let name = no # name in
- check_name warner name;
- if Hashtbl.mem notations name then
- raise (Validation_error("Notation `" ^ name ^ "' declared twice"));
- Hashtbl.add notations name no;
- notation_names <- name :: notation_names
-
-
- method add_pinstr pi =
- if pi # encoding <> encoding then
- failwith "Pxp_dtd.dtd # add_pinstr: Inconsistent encodings";
- let name = pi # target in
- check_name warner name;
-
- if String.length name >= 4 && String.sub name 0 4 = "pxp:" then begin
- match name with
- "pxp:dtd" ->
- let _, optname, atts = pi # parse_pxp_option in
- begin match optname with
- "optional-element-and-notation-declarations" ->
- self # allow_arbitrary
- | "optional-attribute-declarations" ->
- let lexers = Pxp_lexers.get_lexer_set encoding in
- let el_string =
- try List.assoc "elements" atts
- with Not_found ->
- raise(Error("Missing `elements' attribute for pxp:dtd"))
- in
- let el = split_attribute_value lexers el_string in
- List.iter
- (fun e_name ->
- let e =
- try Hashtbl.find elements e_name
- with
- Not_found ->
- raise(Error("Reference to unknown element `" ^
- e_name ^ "'"))
- in
- e # allow_arbitrary
- )
- el
- | _ ->
- raise(Error("Unknown PXP option `" ^
- optname ^ "'"))
- end
- | _ ->
- raise(Error("The processing instruction target `" ^
- name ^ "' is not defined by this PXP version"))
- end
- else begin
- (*----------------------------------------------------------------------
- * SUPPORT FOR DEPRECATED PI OPTIONS:
- * - <?xml:allow_undeclared_elements_and_notations?>
- * is now <?pxp:dtd optional-element-and-notation-declarations?>
- * - <?xml:allow_undeclared_attributes <elementname>?>
- * is now <?pxp:dtd optional-attribute-declarations
- * elements='<elementname> ...'?>
- * Please update your DTDs! Alternatively, you may uncommment the
- * following piece of code.
- *)
-(* if name = "xml:allow_undeclared_elements_and_notations" then *)
-(* self # allow_arbitrary; *)
-(* if name = "xml:allow_undeclared_attributes" then begin *)
-(* let v = pi # value in *)
-(* let e = *)
-(* try *)
-(* Hashtbl.find elements v *)
-(* with *)
-(* Not_found -> *)
-(* raise(Validation_error("Reference to undeclared element `"*)
-(* ^ v ^ "'")) *)
-(* in *)
-(* e # allow_arbitrary; *)
-(* end; *)
- (*----------------------------------------------------------------------
- *)
- ()
- end;
- Hashtbl.add pinstr name pi;
- pinstr_names <- name :: pinstr_names;
-
-
- method element name =
- (* returns the element 'name' or raises Validation_error if not found *)
- try
- Hashtbl.find elements name
- with
- Not_found ->
- if allow_arbitrary then
- raise Undeclared
- else
- raise(Validation_error("Reference to undeclared element `" ^ name ^ "'"))
-
- method element_names =
- (* returns the list of all names of element declarations *)
- element_names
-
-
- method gen_entity name =
- (* returns the entity 'name' or raises WF_error if not found *)
- try
- Hashtbl.find gen_entities name
- with
- Not_found ->
- raise(WF_error("Reference to undeclared general entity `" ^ name ^ "'"))
-
-
- method gen_entity_names = gen_entity_names
-
-
- method par_entity name =
- (* returns the entity 'name' or raises WF_error if not found *)
- try
- Hashtbl.find par_entities name
- with
- Not_found ->
- raise(WF_error("Reference to undeclared parameter entity `" ^ name ^ "'"))
-
-
- method par_entity_names = par_entity_names
-
-
- method notation name =
- (* returns the notation 'name' or raises Validation_error if not found *)
- try
- Hashtbl.find notations name
- with
- Not_found ->
- if allow_arbitrary then
- raise Undeclared
- else
- raise(Validation_error("Reference to undeclared notation `" ^ name ^ "'"))
-
-
- method notation_names = notation_names
-
-
- method pinstr name =
- (* returns the list of all processing instructions contained in the DTD
- * with target 'name'
- *)
- Hashtbl.find_all pinstr name
-
-
- method pinstr_names = pinstr_names
-
- method write os enc doctype =
- let wms =
- write_markup_string ~from_enc:encoding ~to_enc:enc os in
-
- let write_sysid s =
- if String.contains s '"' then
- wms ("'" ^ s ^ "'")
- else
- wms ("\"" ^ s ^ "\"");
- in
-
- if doctype then begin
- wms "<!DOCTYPE ";
- ( match root with
- None -> failwith "#write: DTD without root";
- | Some r -> wms r
- );
- wms " [\n";
- end;
-
- (* Notations: *)
- List.iter
- (fun name ->
- let notation =
- try Hashtbl.find notations name with Not_found -> assert false in
- notation # write os enc)
- (List.sort compare notation_names);
-
- (* Unparsed entities: *)
- List.iter
- (fun name ->
- let ent,_ =
- try Hashtbl.find gen_entities name with Not_found -> assert false
- in
- if ent # is_ndata then begin
- let xid = ent # ext_id in
- let notation = ent # notation in
- wms ("<!ENTITY " ^ name ^ " " );
- ( match xid with
- System s ->
- wms "SYSTEM ";
- write_sysid s;
- | Public (p,s) ->
- wms "PUBLIC ";
- write_sysid p;
- if (s <> "") then begin
- wms " ";
- write_sysid s;
- end;
- | Anonymous ->
- failwith "#write: External ID Anonymous cannot be represented"
- );
- wms (" NDATA " ^ notation ^ ">\n");
- end
- )
- (List.sort compare gen_entity_names);
-
- (* Elements: *)
- List.iter
- (fun name ->
- let element =
- try Hashtbl.find elements name with Not_found -> assert false in
- element # write os enc)
- (List.sort compare element_names);
-
- (* Processing instructions: *)
- List.iter
- (fun name ->
- let pi =
- try Hashtbl.find pinstr name with Not_found -> assert false in
- pi # write os enc)
- (List.sort compare pinstr_names);
-
- if doctype then
- wms "]>\n";
-
- method write_compact_as_latin1 os doctype =
- self # write os `Enc_iso88591 doctype
-
-
-
- (************************************************************)
- (* VALIDATION *)
- (************************************************************)
-
- method only_deterministic_models =
- Hashtbl.iter
- (fun n el ->
- let cm = el # content_model in
- match cm with
- Regexp _ ->
- if el # content_dfa = None then
- raise(Validation_error("The content model of element `" ^
- n ^ "' is not deterministic"))
- | _ ->
- ()
- )
- elements;
-
-
- method validate =
- if validated or allow_arbitrary then
- ()
- else begin
- (* Validity constraint: Notations in NDATA entity declarations must
- * be declared
- *)
- List.iter
- (fun name ->
- let ent,_ =
- try Hashtbl.find gen_entities name with Not_found -> assert false
- in
- if ent # is_ndata then begin
- let xid = ent # ext_id in
- let notation = ent # notation in
- try
- ignore(self # notation notation)
- (* Raises Validation_error if the constraint is violated *)
- with
- Undeclared -> ()
- end
- )
- gen_entity_names;
-
- (* Validate the elements: *)
- Hashtbl.iter
- (fun n el ->
- el # validate)
- elements;
-
- (* Check the root element: *)
- (* TODO: Check if this piece of code is executed at all! *)
- begin match root with
- None -> ()
- | Some r ->
- begin try
- let _ = Hashtbl.find elements r in ()
- with
- Not_found ->
- raise(Validation_error("The root element is not declared"))
- end
- end;
- validated <- true;
- end
-
- method invalidate =
- validated <- false
-
- (************************************************************)
-
- end
-
-
-(**********************************************************************)
-
-and dtd_element the_dtd the_name =
- object (self)
- val dtd = (the_dtd : dtd)
- val name = the_name
- val lexerset = Pxp_lexers.get_lexer_set (the_dtd # encoding)
- val mutable content_model = Unspecified
- val mutable content_model_validated = false
- val mutable content_dfa = lazy None
-
- val mutable externally_declared = false
-
- val mutable attributes =
- ([] : (string * ((att_type * att_default) * bool)) list)
- val mutable attributes_validated = false
-
- val mutable id_att_name = None
- val mutable idref_att_names = []
-
- val mutable allow_arbitrary = false
-
- method name = name
-
- method set_cm_and_extdecl m extdecl =
- if content_model = Unspecified then begin
- content_model <- m;
- content_model_validated <- false;
- content_dfa <- lazy (self # compute_content_dfa);
- externally_declared <- extdecl;
- dtd # invalidate
- end
- else
- raise(Validation_error("Element `" ^ name ^ "' has already a content model"))
-
- method content_model = content_model
-
- method content_dfa = Lazy.force content_dfa
-
- method private compute_content_dfa =
- match content_model with
- Regexp re ->
- ( try Some (dfa_of_regexp_content_model re)
- with Not_found -> None
- )
- | _ ->
- None
-
- method externally_declared = externally_declared
-
- method encoding = dtd # encoding
-
- method allow_arbitrary =
- allow_arbitrary <- true
-
- method disallow_arbitrary =
- allow_arbitrary <- false
-
- method arbitrary_allowed = allow_arbitrary
-
- method add_attribute aname t d extdecl =
- if aname <> "xml:lang" & aname <> "xml:space" then
- check_name (dtd#warner) aname;
- if List.mem_assoc aname attributes then
- dtd # warner # warn ("More than one declaration for attribute `" ^
- aname ^ "' of element type `" ^ name ^ "'")
- else begin
- begin match aname with
- "xml:space" ->
- begin match t with
- A_enum l ->
- let l' = Sort.list ( <= ) l in
- if l' <> [ "default"; "preserve" ] then
- raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
- | _ ->
- raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
- end
- | _ -> ()
- end;
- begin match t with
- A_id ->
- id_att_name <- Some aname;
- | (A_idref | A_idrefs) ->
- idref_att_names <- aname :: idref_att_names
- | _ ->
- ()
- end;
- attributes <- (aname, ((t,d),extdecl)) :: attributes;
- attributes_validated <- false;
- dtd # invalidate;
- end
-
- method attribute attname =
- try
- fst (List.assoc attname attributes)
- with
- Not_found ->
- if allow_arbitrary then
- raise Undeclared
- else
- raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
- ^ name ^ "' not declared"))
-
- method attribute_violates_standalone_declaration attname v =
- try
- let (atype, adefault), extdecl = List.assoc attname attributes in
- extdecl &&
- ( match v with
- None ->
- adefault <> D_required && adefault <> D_implied
- (* i.e. adefault matches D_default or D_fixed *)
- | Some s ->
- atype <> A_cdata &&
- normalization_changes_value lexerset atype s
- )
- with
- Not_found ->
- if allow_arbitrary then
- raise Undeclared
- else
- raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
- ^ name ^ "' not declared"))
-
-
- method attribute_names =
- List.map fst attributes
-
- method names_of_required_attributes =
- List.flatten
- (List.map
- (fun (n,((t,d),_)) ->
- if d = D_required then
- [n]
- else
- [])
- attributes)
-
- method id_attribute_name = id_att_name
-
- method idref_attribute_names = idref_att_names
-
-
- method write os enc =
- let encoding = self # encoding in
- let wms =
- write_markup_string ~from_enc:encoding ~to_enc:enc os in
-
- let rec write_contentspec cs =
- match cs with
- Unspecified ->
- failwith "#write: Unspecified content model found"
- | Empty ->
- wms "EMPTY"
- | Any ->
- wms "ANY"
- | Mixed ml ->
- wms "(";
- write_mixedspec_list ml;
- wms ")*";
- | Regexp re ->
- write_children re false
-
- and write_mixedspec_list ml =
- match ml with
- MPCDATA :: ml' ->
- wms "#PCDATA";
- if ml' <> [] then wms "|";
- write_mixedspec_list ml';
- | MChild s :: ml' ->
- wms s;
- if ml' <> [] then wms "|";
- write_mixedspec_list ml';
- | [] ->
- ()
-
- and write_children re cp =
- match re with
- Optional re' ->
- let p = needs_parens re' in
- if p then wms "(";
- write_children re' cp;
- if p then wms ")";
- wms "?";
- | Repeated re' ->
- let p = needs_parens re' in
- if p then wms "(";
- write_children re' cp;
- if p then wms ")";
- wms "*";
- | Repeated1 re' ->
- let p = needs_parens re' in
- if p then wms "(";
- write_children re' cp;
- if p then wms ")";
- wms "+";
- | Alt re' ->
- wms "(";
- ( match re' with
- re1' :: rer' ->
- write_children re1' true;
- List.iter
- (fun ren' ->
- wms "|";
- write_children ren' true;
- )
- rer';
- | [] ->
- failwith "#write: Illegal content model"
- );
- wms ")";
- | Seq re' ->
- wms "(";
- ( match re' with
- re1' :: rer' ->
- write_children re1' true;
- List.iter
- (fun ren' ->
- wms ",";
- write_children ren' true;
- )
- rer';
- | [] ->
- failwith "#write: Illegal content model"
- );
- wms ")";
- | Child ch ->
- if not cp then wms "(";
- wms ch;
- if not cp then wms ")";
-
- and needs_parens re =
- match re with
- (Optional _ | Repeated _ | Repeated1 _ ) -> true
- | _ -> false
- in
-
- wms ("<!ELEMENT " ^ name ^ " ");
- write_contentspec content_model;
- wms ">\n";
-
- wms ("<!ATTLIST " ^ name);
- List.iter
- (fun (n,((t,d),_)) ->
- wms ("\n " ^ n);
- ( match t with
- A_cdata -> wms " CDATA";
- | A_id -> wms " ID";
- | A_idref -> wms " IDREF";
- | A_idrefs -> wms " IDREFS";
- | A_entity -> wms " ENTITY";
- | A_entities -> wms " ENTITIES";
- | A_nmtoken -> wms " NMTOKEN";
- | A_nmtokens -> wms " NMTOKENS";
- | A_notation nl ->
- wms " NOTATION (";
- ( match nl with
- nl1:: nl' ->
- wms nl1;
- List.iter
- (fun n ->
- wms ("|" ^ n);
- )
- nl'
- | [] ->
- failwith "#write: Illegal content model";
- );
- wms ")";
- | A_enum el ->
- wms " (";
- ( match el with
- el1:: el' ->
- wms el1;
- List.iter
- (fun e ->
- wms ("|" ^ e);
- )
- el'
- | [] ->
- failwith "#write: Illegal content model";
- );
- wms ")";
- );
- ( match d with
- D_required -> wms " #REQUIRED"
- | D_implied -> wms " #IMPLIED"
- | D_default s ->
- wms " \"";
- write_data_string ~from_enc:encoding ~to_enc:enc os s;
- wms "\"";
- | D_fixed s ->
- wms " FIXED \"";
- write_data_string ~from_enc:encoding ~to_enc:enc os s;
- wms "\"";
- );
- )
- attributes;
-
- wms ">\n";
-
- method write_compact_as_latin1 os =
- self # write os `Enc_iso88591
-
- (************************************************************)
- (* VALIDATION *)
- (************************************************************)
-
- method validate =
- self # validate_attributes();
- self # validate_content_model()
-
- method private validate_attributes() =
- if attributes_validated then
- ()
- else begin
- (* Validity Constraint: One ID per Element Type *)
- let n = count (fun (n,((t,d),_)) -> t = A_id) attributes in
- if n > 1 then
- raise(Validation_error("More than one ID attribute for element `" ^ name ^ "'"));
- (* Validity Constraint: ID Attribute Default *)
- if List.exists
- (fun (n,((t,d),_)) ->
- t = A_id & (d <> D_required & d <> D_implied))
- attributes
- then
- raise(Validation_error("ID attribute must be #IMPLIED or #REQUIRED; element `" ^ name ^ "'"));
- (* Validity Constraint: One Notation per Element Type *)
- let n = count (fun (n,((t,d),_)) ->
- match t with A_notation _ -> true | _ -> false)
- attributes in
- if n > 1 then
- raise(Validation_error("More than one NOTATION attribute for element `" ^ name ^ "'"));
- (* Validity Constraint: Notation Attributes [second part] *)
- List.iter
- (fun (n,((t,d),_)) ->
- match t with
- A_notation l ->
- List.iter
- (fun nname ->
- let _ = dtd # notation nname in ())
- l
- | _ -> ())
- attributes;
- (* Validity Constraint: Attribute Default Legal *)
- List.iter
- (fun (n,((t,d),_)) ->
-
- let check v =
- let lexical_error() =
- lazy (raise(Validation_error("Default value for attribute `" ^ n ^ "' is lexically malformed"))) in
- check_attribute_value_lexically lexerset (lexical_error()) t v;
- begin match t with
- (A_entity|A_entities) ->
- List.iter
- (fun nd ->
- let en, extdecl = dtd # gen_entity nd in
- if not (en # is_ndata) then
- raise(Validation_error("Attribute default value must be the name of an NDATA entity; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
-(* if dtd # standalone_declaration && extdecl then
- raise(Validation_error("Attribute default value violates the standalone declaration; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
--- This is checked anyway when the attribute value is normalized
-*)
- )
- (split_attribute_value lexerset v)
- | A_notation nl ->
- if not (List.mem v nl) then
- raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
- | A_enum nl ->
- if not (List.mem v nl) then
- raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
- | _ -> ()
- end
- in
-
- match d with
- D_required -> ()
- | D_implied -> ()
- | D_default v -> check v
- | D_fixed v -> check v
- )
- attributes;
-
- (* Ok: This element declaration is valid *)
- attributes_validated <- true;
-
- end
-
- method private validate_content_model () =
- (* checks:
- * - Validity Constraint: No Duplicate Types
- * It is not an error if there is a child in the declaration for which
- * no element declaration is provided.
- *)
- match content_model with
- Unspecified ->
- dtd # warner # warn ("Element type `" ^ name ^ "' mentioned but not declared");
- ()
- | Empty -> ()
- | Any -> ()
- | Mixed (pcdata :: l) ->
- (* MPCDATA is always the first element by construction *)
- assert (pcdata = MPCDATA);
- if check_dups l then
- raise (Validation_error("Double children in declaration for element `" ^ name ^ "'"))
- | Regexp _ -> ()
- | _ -> assert false
-
-
-
- (************************************************************)
-
- end
-
-and dtd_notation the_name the_xid init_encoding =
-object (self)
- val name = the_name
- val xid = (the_xid : ext_id)
- val encoding = (init_encoding : Pxp_types.rep_encoding)
- method name = name
- method ext_id = xid
- method encoding = encoding
-
- method write os enc =
- let wms =
- write_markup_string ~from_enc:encoding ~to_enc:enc os in
-
- let write_sysid s =
- if String.contains s '"' then
- wms ("'" ^ s ^ "'")
- else
- wms ("\"" ^ s ^ "\"");
- in
-
- wms ("<!NOTATION " ^ name ^ " ");
- ( match xid with
- System s ->
- wms "SYSTEM ";
- write_sysid s;
- | Public (p,s) ->
- wms "PUBLIC ";
- write_sysid p;
- if (s <> "") then begin
- wms " ";
- write_sysid s;
- end;
- | Anonymous ->
- failwith "#write: External ID Anonymous cannot be represented"
- );
- wms ">\n";
-
- method write_compact_as_latin1 os =
- self # write os `Enc_iso88591
-
- end
-
-and proc_instruction the_target the_value init_encoding =
-object (self)
- val target = the_target
- val value = (the_value : string)
- val encoding = (init_encoding : Pxp_types.rep_encoding)
-
- initializer
- match target with
- ("xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML") ->
- (* This is an error, not a warning, because I do not have a
- * "warner" object by hand.
- *)
- raise(WF_error("Reserved processing instruction"))
- | _ -> ()
-
- method target = target
- method value = value
- method encoding = encoding
-
- method write os enc =
- let wms =
- write_markup_string ~from_enc:encoding ~to_enc:enc os in
-
- wms "<?";
- wms target;
- wms " ";
- wms value;
- wms "?>";
-
- method write_compact_as_latin1 os =
- self # write os `Enc_iso88591
-
- method parse_pxp_option =
- let lexers = get_lexer_set encoding in
- try
- let toks = tokens_of_xml_pi lexers value in (* may raise WF_error *)
- begin match toks with
- (Pro_name option_name) :: toks' ->
- let atts = decode_xml_pi toks' in (* may raise WF_error *)
- (target, option_name, atts)
- | _ ->
- raise(Error("Bad PXP processing instruction"))
- end
- with
- WF_error _ ->
- raise(Error("Bad PXP processing instruction"))
-
- end
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.10 2000/08/18 21:18:45 gerd
- * Updated wrong comments for methods par_entity and gen_entity.
- * These can raise WF_error and not Validation_error, and this is the
- * correct behaviour.
- *
- * Revision 1.9 2000/07/25 00:30:01 gerd
- * Added support for pxp:dtd PI options.
- *
- * Revision 1.8 2000/07/23 02:16:34 gerd
- * Support for DFAs.
- *
- * Revision 1.7 2000/07/16 17:50:01 gerd
- * Fixes in 'write'
- *
- * Revision 1.6 2000/07/16 16:34:41 gerd
- * New method 'write', the successor of 'write_compact_as_latin1'.
- *
- * Revision 1.5 2000/07/14 13:56:48 gerd
- * Added methods id_attribute_name and idref_attribute_names.
- *
- * Revision 1.4 2000/07/09 00:13:37 gerd
- * Added methods gen_entity_names, par_entity_names.
- *
- * Revision 1.3 2000/07/04 22:10:55 gerd
- * Update: collect_warnings -> drop_warnings.
- * Update: Case ext_id = Anonymous.
- *
- * Revision 1.2 2000/06/14 22:19:06 gerd
- * Added checks such that it is impossible to mix encodings.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- *
- * Revision 1.18 2000/05/28 17:24:55 gerd
- * Bugfixes.
- *
- * Revision 1.17 2000/05/27 19:21:25 gerd
- * Implemented the changes of rev. 1.10 of markup_dtd.mli.
- *
- * Revision 1.16 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.15 2000/05/14 21:50:07 gerd
- * Updated: change in internal_entity.
- *
- * Revision 1.14 2000/05/06 23:08:46 gerd
- * It is possible to allow undeclared attributes.
- *
- * Revision 1.13 2000/05/01 20:42:46 gerd
- * New method write_compact_as_latin1.
- *
- * Revision 1.12 2000/05/01 15:16:57 gerd
- * The errors "undeclared parameter/general entities" are
- * well-formedness errors, not validation errors.
- *
- * Revision 1.11 2000/03/11 22:58:15 gerd
- * Updated to support Markup_codewriter.
- *
- * Revision 1.10 2000/01/20 20:53:47 gerd
- * Changed such that it runs with Markup_entity's new interface.
- *
- * Revision 1.9 1999/11/09 22:15:41 gerd
- * Added method "arbitrary_allowed".
- *
- * Revision 1.8 1999/09/01 22:52:22 gerd
- * If 'allow_arbitrary' is in effect, no validation happens anymore.
- *
- * Revision 1.7 1999/09/01 16:21:24 gerd
- * Added several warnings.
- * The attribute type of "xml:space" is now strictly checked.
- *
- * Revision 1.6 1999/08/15 20:34:21 gerd
- * Improved error messages.
- * Bugfix: It is no longer allowed to create processing instructions
- * with target "xml".
- *
- * Revision 1.5 1999/08/15 02:20:16 gerd
- * New feature: a DTD can allow arbitrary elements.
- *
- * Revision 1.4 1999/08/15 00:21:39 gerd
- * Comments have been updated.
- *
- * Revision 1.3 1999/08/14 22:12:52 gerd
- * Several functions have now a "warner" as argument which is
- * an object with a "warn" method. This is used to warn about characters
- * that cannot be represented in the Latin 1 alphabet.
- * Bugfix: if two general entities with the same name are definied,
- * the first counts, not the second.
- *
- * Revision 1.2 1999/08/11 14:56:35 gerd
- * Declaration of the predfined entities {lt,gt,amp,quot,apos}
- * is no longer forbidden; but the original definition cannot be overriddden.
- * TODO: If these entities are redeclared with problematic values,
- * the user should be warned.
- *
- * Revision 1.1 1999/08/10 00:35:51 gerd
- * Initial revision.
- *
- *
- *)