Xt templating language (OCaml server) (http://chris.pacejo.net/programs/xt)

root / SAX.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
open Streams
open Unicode
open Str
open Scanf

type name = utf8_string
type value = utf8_string
type attribs = (name * value) list
type text = utf8_string

type event =
	| Open_tag of name * attribs
	| Close_tag of name
	| Empty_tag of name * attribs
	| Text of text
	| CDATA of utf8_string
	| Comment of utf8_string
	| Whitespace of text
	| PI of name * utf8_string
	| DOCTYPE of utf8_string

let entity_re = regexp "&\\([^;]+\\);"

exception Unknown_entity of name

let expand ?(mapping=(fun _ -> raise Not_found)) =
	global_substitute entity_re (fun s ->
		match matched_group 1 s with
		| "lt" -> "<"
		| "gt" -> ">"
		| "amp" -> "&"
		| "apos" -> "\'"
		| "quot" -> "\""
		| ent ->
			try sscanf ent "#x%X" (fun cp -> UTF_8_String.of_cp_list [cp])
			with Scan_failure _ ->
			try sscanf ent "#%u" (fun cp -> UTF_8_String.of_cp_list [cp])
			with Scan_failure _ ->
			try mapping ent
			with Not_found -> raise (Unknown_entity ent))

let escape_re = regexp "[<&>]"

let escape =
	global_substitute escape_re (fun s ->
		match matched_string s with
		| "<" -> "&lt;"
		| "&" -> "&amp;"
		| ">" -> "&gt;"
		| _ -> assert false)

exception Unexpected of code_point

module SimpleReader(Putback: Putback with type u = code_point) = struct
	(* TODO: catch missing whitespace *)
	type t = Putback.t
	type u = event
	
	module POps = PutbackOps(Putback)
	module UTF_8_StringOps = ReaderOps(UTF_8_String.Reader)
	module UTF_8_Buffer = UTF_8.Writer(Buffer_ByteWriter)
	
	let expect_cp s cp =
		let cp' = Putback.get s in
		if cp <> cp' then raise (Unexpected cp')
	
	let expect s str = UTF_8_StringOps.iter (expect_cp s) (UTF_8_String.Reader.of_utf8_string str)
	
	let read_until s pred =
		let buf = Buffer.create 16 in
		let ubuf = UTF_8_Buffer.of_writer buf in
		let rec f () =
			let cp = Putback.get s in
			if pred cp then begin
				Putback.unget s cp;
				Buffer.contents buf
			end else begin
				UTF_8_Buffer.put ubuf cp;
				f ()
			end
		in f ()
	
	let read_while s pred =
		let buf = Buffer.create 16 in
		let ubuf = UTF_8_Buffer.of_writer buf in
		let rec f () =
			match POps.maybe_get s with
			| Some cp ->
				if pred cp then begin
					UTF_8_Buffer.put ubuf cp;
					f ()
				end else begin
					Putback.unget s cp;
					Buffer.contents buf
				end
			| None -> Buffer.contents buf
		in f ()
	
	let check s str cp =
		let seq = UTF_8_String.Reader.of_utf8_string str in
		let cp' = UTF_8_String.Reader.get seq in
		cp = cp' && let rec f () =
			match UTF_8_StringOps.maybe_get seq with
			| Some cp' ->
				let cp = POps.maybe_get s in
				let res = cp = Some cp' && f () in
				(match cp with Some cp -> Putback.unget s cp | None -> ());
				res
			| None -> true
		in f ()
	
	let is_whitespace cp =
		cp = 0x20 || cp = 0x09 || cp = 0x0D || cp = 0x0A
	
	let is_name_char cp =
		(* TODO: FIXME *)
		not (is_whitespace cp) && cp <> u '>' && cp <> u '?' && cp <> u '=' && cp <> u '/'
	
	let rec discard_whitespace s =
		let cp = Putback.get s in
		if is_whitespace cp then discard_whitespace s
		else Putback.unget s cp
	
	let read_whitespace s =
		Whitespace (read_while s is_whitespace)
	
	let read_text s =
		Text (read_while s (fun cp -> cp <> u '<'))
	
	let read_name s =
		let name = read_while s is_name_char in
		if name = "" then raise (Unexpected (Putback.get s))
		else name
	
	let read_attribute s =
		let name = read_name s in
		discard_whitespace s;
		expect s "=";
		discard_whitespace s;
		let q = Putback.get s in
		if q <> u '\'' && q <> u '"' then raise (Unexpected q)
		else begin
			let value = read_until s (fun cp -> cp = u '<' || cp = q) in
			expect_cp s q;
			(name, value)
		end
	
	let rec read_attributes s =
		discard_whitespace s;
		if is_name_char (POps.peek s) then
			let nv = read_attribute s in
			(* strictness point *)
			nv :: read_attributes s
		else []
	
	let read_open_tag s =
		let name = read_name s in
		let atts = read_attributes s in
		let cp = Putback.get s in
		if cp = u '/' then begin
			expect s ">";
			Empty_tag (name, atts)
		end else if cp = u '>' then Open_tag (name, atts)
		else raise (Unexpected cp)
	
	let read_close_tag s =
		let name = read_name s in
		discard_whitespace s;
		expect s ">";
		Close_tag name
	
	let read_pi s =
		let name = read_name s in
		discard_whitespace s;
		let pi = read_until s (check s "?>") in
		expect s "?>";
		PI (name, pi)
	
	let read_comment s =
		expect s "-";
		let comment = read_until s (check s "--") in
		expect s "-->";
		Comment comment
	
	let read_cdata s =
		expect s "CDATA[";
		let cdata = read_until s (check s "]]>") in
		expect s "]]>";
		CDATA cdata
	
	let read_doctype s =
		expect s "OCTYPE";
		discard_whitespace s;
		let doctype = read_until s (fun cp -> cp = u '>') in
		expect s ">";
		DOCTYPE doctype
	
	let read_special s =
		let cp = Putback.get s in
		if cp = u '-' then read_comment s
		else if cp = u '[' then read_cdata s
		else if cp = u 'D' then read_doctype s
		else raise (Unexpected cp)
	
	let read_tag s =
		let cp = Putback.get s in
		if cp = u '/' then read_close_tag s
		else if cp = u '?' then read_pi s
		else if cp = u '!' then read_special s
		else begin
			(* TODO: check name start char *)
			Putback.unget s cp;
			read_open_tag s
		end
	
	let get s =
		let cp = Putback.get s in
		if cp = u '<' then read_tag s
		else (*if is_whitespace cp then begin
			Putback.unget s cp;
			read_whitespace s
		end else*) begin
			Putback.unget s cp;
			read_text s
		end
	
	let of_putback s = s
end

module SimpleWriter(Writer: Writer with type u = code_point) = struct
	type t = Writer.t
	type u = event
	
	module UTF_8_StringOps = ReaderOps(UTF_8_String.Reader)
	
	let put_string s str =
		UTF_8_StringOps.iter (Writer.put s) (UTF_8_String.Reader.of_utf8_string str)
	
	let put_attrib s (name, value) =
		Writer.put s (u ' ');
		put_string s name;
		Writer.put s (u '=');
		Writer.put s (u '"');
		UTF_8_StringOps.iter (fun cp ->
			if cp = u '"' then put_string s "&quot;"
			else Writer.put s cp)
			(UTF_8_String.Reader.of_utf8_string value);
		Writer.put s (u '"')
	
	let put_attribs s attribs = List.iter (put_attrib s) attribs
	
	let put s = function
		| Open_tag (name, attribs) ->
			Writer.put s (u '<');
			put_string s name;
			put_attribs s attribs;
			Writer.put s (u '>')
		| Close_tag name ->
			Writer.put s (u '<');
			Writer.put s (u '/');
			put_string s name;
			Writer.put s (u '>')
		| Empty_tag (name, attribs) ->
			Writer.put s (u '<');
			put_string s name;
			put_attribs s attribs;
			put_string s " />" (* for great SGML *)
		| Text text -> put_string s text
		| CDATA cdata ->
			put_string s "<![CDATA[";
			put_string s cdata;
			put_string s "]]>"
		| Comment comment ->
			put_string s "<!--";
			put_string s comment;
			put_string s "-->"
		| Whitespace ws -> put_string s ws
		| PI (name, pi) ->
			put_string s "<?";
			put_string s name;
			if pi <> "" then (Writer.put s (u ' '); put_string s pi);
			put_string s "?>"
		| DOCTYPE doctype ->
			put_string s "<!DOCTYPE ";
			put_string s doctype;
			put_string s ">"
	
	let flush = Writer.flush
	
	let of_writer s = s
end

module SimpleByteReader(Putback: Putback with type u = byte) = struct
	module U = UTF_XML.Putback(Putback)
	include SimpleReader(U)
	let of_putback p = of_putback (U.of_putback p)
end

module SimpleByteWriter(W: Writer with type u = byte) = struct
	module U = UTF_XML.Writer(W)
	include SimpleWriter(U)
	let of_writer w = of_writer (U.of_writer w)
end