Point Cloud Library (PCL) 1.12.1
opennurbs_unicode.h
1/* $NoKeywords: $ */
2/*
3//
4// Copyright (c) 1993-2012 Robert McNeel & Associates. All rights reserved.
5// OpenNURBS, Rhinoceros, and Rhino3D are registered trademarks of Robert
6// McNeel & Associates.
7//
8// THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
9// ALL IMPLIED WARRANTIES OF FITNESS FOR ANY PARTICULAR PURPOSE AND OF
10// MERCHANTABILITY ARE HEREBY DISCLAIMED.
11//
12// For complete openNURBS copyright information see <http://www.opennurbs.org>.
13//
14////////////////////////////////////////////////////////////////
15*/
16
17#if !defined(OPENNURBS_UNICODE_INC_)
18#define OPENNURBS_UNICODE_INC_
19
20ON_BEGIN_EXTERNC
21
23{
24 /*
25 If an error occurs, then bits of error_status are
26 set to indicate what type of error occured.
27
28 Error types:
29 1: The input parameters were invalid.
30 This error cannot be masked.
31
32 2: The output buffer was not large enough to hold the converted
33 string. As much conversion as possible is performed in this
34 case and the error cannot be masked.
35
36 4: When parsing a UTF-8 or UTF-32 string, the values of two
37 consecutive encoding sequences formed a valid UTF-16
38 surrogate pair.
39
40 This error is masked if 0 != (4 & m_error_mask).
41 If the error is masked, then the surrogate pair is
42 decoded, the value of the resulting unicode code point
43 is used, and parsing continues.
44
45 8: An overlong UTF-8 encoding sequence was encountered and
46 the value of the overlong sUTF-8 equence was a valid unicode
47 code point.
48
49 This error is masked if 0 != (8 & m_error_mask).
50 If the error is masked, then the unicode code point is
51 used and parsing continues.
52
53 16: An illegal UTF-8 encoding sequence occured or an invalid
54 unicode code point value resulted from decoding a
55 UTF-8 sequence.
56
57 This error is masked if 0 != (16 & m_error_mask).
58 If the error is masked and the value of m_error_code_point is
59 a valid unicode code point, then m_error_code_point is used
60 and parsing continues.
61 */
62 unsigned int m_error_status;
63
64 /*
65 If 0 != (error_mask & 4), then type 4 errors are masked.
66 If 0 != (error_mask & 8), then type 8 errors are masked.
67 If 0 != (error_mask & 16) and m_error_code_point is a valid unicode
68 code point value, then type 16 errors are masked.
69 */
70 unsigned int m_error_mask;
71
72 /*
73 Unicode code point value to use in when masking type 16 errors.
74 If 0 == (error_mask & 16), then this parameter is ignored.
75 0xFFFD is a popular choice for the m_error_code_point value.
76 */
78};
79
80
81/*
82Description:
83 Test a value to determine if it is a valid unicode code point value.
84Parameters:
85 u - [in] value to test
86Returns:
87 true: u is a valid unicode code point
88 false: u is not a valid unicode code point
89Remarks:
90 Valid unicode code points are
91 (0 <= u && u <= 0xD7FF) || (0xE000 <= u && u <= 0x10FFFF)
92*/
93ON_DECL
94int ON_IsValidUnicodeCodePoint( ON__UINT32 u );
95
96/*
97Description:
98 Convert an integer to its UTF-8 form.
99Parameters:
100 u - [in]
101 Interger in the CPU's native byte order that can be
102 converted to UTF-8 form.
103 Valid values are in the interval [0,2147483647].
104 sUTF8 - [out]
105 sUTF8 is a buffer of 6 ON__UINT8 elements and the UTF-8 form
106 is returned in sUTF8[]. The returned value specifies how
107 many elements of sUTF8[] are set.
108Returns:
109 0: u is too large (>=2^31) to be encode as a UTF-8 string.
110 No changes are made to the sUTF8[] values.
111 1: the UTF-8 form of u is 1 byte returned in sUTF8[0].
112 2: the UTF-8 form of u is 2 byts returned in sUTF8[0],sUTF8[1].
113 3: the UTF-8 form of u is 3 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2].
114 4: the UTF-8 form of u is 4 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3].
115 5: the UTF-8 form of u is 5 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4].
116 6: the UTF-8 form of u is 6 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4],sUTF8[5].
117 For return values requiring less than 6 bytes, no changes
118 are made to the unused bytes in sUTF8[].
119Remarks:
120 Any integer in the range 0 to 2^31 - 1 can be encoded as a UTF-8 string.
121 When a unicode string is being encoded take steps to ensure that
122 u is a valid unicode code point value. The function ON_IsValidUnicodeCodePoint()
123 can be used to determine if u is a valid unicode code point value.
124*/
125ON_DECL
126int ON_EncodeUTF8( ON__UINT32 u, ON__UINT8 sUTF8[6] );
127
128/*
129Description:
130 Decode a UTF-8 encode string to get a single unicode code point.
131Parameters:
132 sUTF8 - [in]
133 UTF-8 string to convert.
134
135 sUTF8_count - [in]
136 number of ON__UINT8 elements in sUTF8[].
137
138 e - [in/out]
139 If e is null, errors are not masked and parsing is performed
140 to the point where the first error occurs.
141 If e is not null, all errors are reported by setting the appropriate
142 e->m_error_status bits and errors are handled as described in the
143 definition of the ON_UnicodeErrorParameters struct.
144
145 unicode_code_point - [out]
146 The unicode_code_point pointer must not be null.
147 If a nonzero value is returned, then *unicode_code_point is
148 a valid unicode code point value.
149Returns:
150 Number of elements of sUTF8 that were parsed.
151 0 indicates failure.
152*/
153ON_DECL
154int ON_DecodeUTF8(
155 const ON__UINT8* sUTF8,
156 int sUTF8_count,
158 ON__UINT32* unicode_code_point
159 );
160
161/*
162Description:
163 Convert a 4 byte unicode code point value to its UTF-16 form.
164Parameters:
165 unicode_code_point - [in]
166 4 byte unicode code point value in the CPU's native byte order.
167 Valid values are in the interval [0,0xD7FF] or the
168 interval [0xE000,0x10FFFF].
169 sUTF16 - [out]
170 sUTF16 is buffer of 2 ON__UINT16 elements. If the UTF-16 form
171 is a single value, it is returned in sUTF16[0]. If the UTF-16
172 is a surrogate pair, the first code unit (high surrogate)
173 is returned sUTF16[0] and the second unit (low surrogate) is
174 returned in sUTF16[1]. The returned values are in
175 the CPU's native byte order.
176Returns:
177 0: u is not a valid Unicode code point. No changes are
178 made to the w[] values.
179 1: u is a valie Unicode code point with a UTF-16 form
180 consisting of the single value returned in w[0].
181 2: u is a valid Unicode code point with a UTF-16 form
182 consisting of a surrogate pair returned in w[0] and w[1].
183*/
184ON_DECL
185int ON_EncodeUTF16( ON__UINT32 unicode_code_point, ON__UINT16 sUTF16[2] );
186
187/*
188Description:
189 Decode a UTF-16 string to get a single unicode code point.
190Parameters:
191 sUTF16 - [in]
192 UTF-16 string to convert.
193
194 sUTF16_count - [in]
195 number of ON__UINT16 elements in sUTF16[].
196
197 e - [in/out]
198 If e is null, errors are not masked and parsing is performed
199 to the point where the first error occurs.
200 If e is not null, all errors are reported by setting the appropriate
201 e->m_error_status bits and errors are handled as described in the
202 definition of the ON_UnicodeErrorParameters struct.
203
204 unicode_code_point - [out]
205 The unicode_code_point pointer must not be null.
206 If a nonzero value is returned, then *unicode_code_point is
207 a valid unicode code point value in the CPU's native byte order.
208Returns:
209 Number of elements of sUTF16 that were parsed.
210 0 indicates failure.
211*/
212ON_DECL
213int ON_DecodeUTF16(
214 const ON__UINT16* sUTF16,
215 int sUTF16_count,
217 ON__UINT32* unicode_code_point
218 );
219
220/*
221Description:
222 Decode a UTF-16 encode string whose elements have byte order
223 opposite the native CPU's to get a single unicode code point.
224Parameters:
225 sUTF16 - [in]
226 UTF-16 string to convert with byte order opposite the
227 CPU's native byte order.
228
229 sUTF16_count - [in]
230 number of ON__UINT16 elements in sUTF16[].
231
232 e - [in/out]
233 If e is null, errors are not masked and parsing is performed
234 to the point where the first error occurs.
235 If e is not null, all errors are reported by setting the appropriate
236 e->m_error_status bits and errors are handled as described in the
237 definition of the ON_UnicodeErrorParameters struct.
238
239 unicode_code_point - [out]
240 The unicode_code_point pointer must not be null.
241 If a nonzero value is returned, then *unicode_code_point is
242 a valid unicode code point value in the CPU's native byte order.
243Returns:
244 Number of elements of sUTF16 that were parsed.
245 0 indicates failure.
246*/
247ON_DECL
248int ON_DecodeSwapByteUTF16(
249 const ON__UINT16* sUTF16,
250 int sUTF16_count,
252 ON__UINT32* unicode_code_point
253 );
254
255/*
256Description:
257 Convert a unicode string from a UTF-8 encoded ON__UINT8 array
258 into a UTF-16 encoded ON__UINT16 array.
259
260Parameters:
261 sUTF8 - [in]
262 UTF-8 string to convert.
263
264 sUTF8_count - [in]
265 If sUTF8_count >= 0, then it specifies the number of
266 ON__UINT8 elements in sUTF8[] to convert.
267
268 If sUTF8_count == -1, then sUTF8 must be a null terminated
269 string and all the elements up to the first null element are
270 converted.
271
272 sUTF16 - [out]
273 If sUTF16 is not null and sUTF16_count > 0, then the UTF-16
274 encoded string is returned in this buffer. If there is room
275 for the null terminator, the converted string will be null
276 terminated. The null terminator is never included in the count
277 of returned by this function. The converted string is in the
278 CPU's native byte order. No byte order mark is prepended.
279
280 sUTF16_count - [in]
281 If sUTF16_count > 0, then it specifies the number of available
282 ON__UINT16 elements in the sUTF16[] buffer.
283
284 If sUTF16_count == 0, then the sUTF16 parameter is ignored.
285
286 error_status - [out]
287 If error_status is not null, then bits of *error_status are
288 set to indicate the success or failure of the conversion.
289 When the error_mask parameter is used to used to mask some
290 conversion errors, multiple bits may be set.
291 0: Successful conversion with no errors.
292 1: Invalid input parameters. This error cannot be masked.
293 2: The sUTF16 output buffer was not large enough to hold
294 the converted string. This error cannot be masked.
295 4: The values of two UTF-8 encoding sequences formed a valid
296 UTF-16 surrogate pair. This error can be masked. If the
297 error is masked, then the surrogate pair is added
298 to the UTF-16 output string and parsing continues.
299 8: An overlong UTF-8 encoding sequence was encountered.
300 The value of the overlong sequence was a valid unicode
301 code point. This error can be masked. If the error is masked,
302 then the unicode code point is encoded and added to the
303 UTF-16 output string and parsing continues.
304 16: An illegal UTF-8 encoding sequence occured or an invalid
305 unicode code point value resulted from decoding a
306 UTF-8 sequence. This error can be masked. If the error is
307 masked and error_code_point is a valid unicode code point,
308 then its UTF-16 encoding is added to the UTF-16 output
309 string and parsing continues.
310
311 error_mask - [in]
312 If 0 != (error_mask & 4), then type 4 errors are masked.
313 If 0 != (error_mask & 8), then type 8 errors are masked.
314 If 0 != (error_mask & 16) and error_code_point is a valid unicode
315 code point value, then type 16 errors are masked.
316
317 error_code_point - [in]
318 Unicode code point value to use in when masking type 16 errors.
319 If 0 == (error_mask & 16), then this parameter is ignored.
320 0xFFFD is a popular choice for the error_code_point value.
321
322 sNextUTF8 - [out]
323 If sNextUTF8 is not null, then *sNextUTF8 points to the first
324 element in the input sUTF8[] buffer that was not converted.
325
326 If an error occurs and is not masked, then *sNextUTF8 points to
327 the element of sUTF8[] where the conversion failed. If no errors
328 occur or all errors are masked, then *sNextUTF8 points to
329 sUTF8 + sUTF8_count.
330
331Returns:
332 If sUTF16_count > 0, the return value is the number of ON__UINT16
333 elements written to sUTF16[]. When the return value < sUTF16_count,
334 a null terminator is written to sUTF16[return value].
335
336 If sUTF16_count == 0, the return value is the minimum number of
337 ON__UINT16 elements that are needed to hold the converted string.
338 The return value does not include room for a null terminator.
339 Increment the return value by one if you want to have an element
340 to use for a null terminator.
341*/
342ON_DECL
343int ON_ConvertUTF8ToUTF16(
344 const ON__UINT8* sUTF8,
345 int sUTF8_count,
346 ON__UINT16* sUTF16,
347 int sUTF16_count,
348 unsigned int* error_status,
349 unsigned int error_mask,
350 ON__UINT32 error_code_point,
351 const ON__UINT8** sNextUTF8
352 );
353
354/*
355Description:
356 Convert a unicode string from a UTF-8 encoded ON__UINT8 array
357 into a UTF-32 encoded ON__UINT32 array.
358
359Parameters:
360 sUTF8 - [in]
361 UTF-8 string to convert.
362
363 sUTF8_count - [in]
364 If sUTF8_count >= 0, then it specifies the number of
365 ON__UINT8 elements in sUTF8[] to convert.
366
367 If sUTF8_count == -1, then sUTF8 must be a null terminated
368 string and all the elements up to the first null element are
369 converted.
370
371 sUTF32 - [out]
372 If sUTF32 is not null and sUTF32_count > 0, then the UTF-32
373 encoded string is returned in this buffer. If there is room
374 for the null terminator, the converted string will be null
375 terminated. The null terminator is never included in the count
376 of returned by this function. The converted string is in the
377 CPU's native byte order. No byte order mark is prepended.
378
379 sUTF32_count - [in]
380 If sUTF32_count > 0, then it specifies the number of available
381 ON__UINT32 elements in the sUTF32[] buffer.
382
383 If sUTF32_count == 0, then the sUTF32 parameter is ignored.
384
385 error_status - [out]
386 If error_status is not null, then bits of *error_status are
387 set to indicate the success or failure of the conversion.
388 When the error_mask parameter is used to used to mask some
389 conversion errors, multiple bits may be set.
390 0: Successful conversion with no errors.
391 1: Invalid input parameters. This error cannot be masked.
392 2: The sUTF32 output buffer was not large enough to hold
393 the converted string. This error cannot be masked.
394 4: The values of two UTF-8 encoding sequences formed a valid
395 UTF-16 surrogate pair. This error can be masked. If the
396 error is masked, then the surrogate pair is decoded,
397 the code point value is added to the UTF-32 output
398 string and parsing continues.
399 8: An overlong UTF-8 encoding sequence was encountered.
400 The value of the overlong sequence was a valid unicode
401 code point. This error can be masked. If the error is masked,
402 then the unicode code point is added to the UTF-32
403 output string and parsing continues.
404 16: An illegal UTF-8 encoding sequence occured or an invalid
405 unicode code point value resulted from decoding a
406 UTF-8 sequence. This error can be masked. If the error is
407 masked and error_code_point is a valid unicode code point,
408 then its value is added to the UTF-32 output string and
409 parsing continues.
410
411 error_mask - [in]
412 If 0 != (error_mask & 4), then type 4 errors are masked.
413 If 0 != (error_mask & 8), then type 8 errors are masked.
414 If 0 != (error_mask & 16) and error_code_point is a valid unicode
415 code point value, then type 16 errors are masked.
416
417 error_code_point - [in]
418 Unicode code point value to use in when masking type 16 errors.
419 If 0 == (error_mask & 16), then this parameter is ignored.
420 0xFFFD is a popular choice for the error_code_point value.
421
422 sNextUTF8 - [out]
423 If sNextUTF8 is not null, then *sNextUTF8 points to the first
424 element in the input sUTF8[] buffer that was not converted.
425
426 If an error occurs and is not masked, then *sNextUTF8 points to
427 the element of sUTF8[] where the conversion failed. If no errors
428 occur or all errors are masked, then *sNextUTF8 points to
429 sUTF8 + sUTF8_count.
430
431Returns:
432 If sUTF32_count > 0, the return value is the number of ON__UINT32
433 elements written to sUTF32[]. When the return value < sUTF32_count,
434 a null terminator is written to sUTF32[return value].
435
436 If sUTF32_count == 0, the return value is the minimum number of
437 ON__UINT32 elements that are needed to hold the converted string.
438 The return value does not include room for a null terminator.
439 Increment the return value by one if you want to have an element
440 to use for a null terminator.
441*/
442ON_DECL
443int ON_ConvertUTF8ToUTF32(
444 const ON__UINT8* sUTF8,
445 int sUTF8_count,
446 ON__UINT32* sUTF32,
447 int sUTF32_count,
448 unsigned int* error_status,
449 unsigned int error_mask,
450 ON__UINT32 error_code_point,
451 const ON__UINT8** sNextUTF8
452 );
453
454/*
455Description:
456 Convert a unicode string from a UTF-16 encoded ON__UINT16 array
457 into a UTF-8 encoded ON__UINT8 array.
458
459Parameters:
460 bTestByteOrder - [in]
461 If bTestByteOrder is true and the first element of sUTF16[]
462 is 0xFEFF, then this element is ignored.
463
464 If bTestByteOrder is true and the first element of sUTF16[]
465 is 0xFFFE, then this element is ignored and the subsequent
466 elements of sUTF16[] have their bytes swapped before the
467 conversion is calculated.
468
469 In all other cases the first element of sUTF16[] is
470 converted and no byte swapping is performed.
471
472 sUTF16 - [in]
473 UTF-16 string to convert.
474
475 If bTestByteOrder is true and the first element of sUTF16[]
476 is 0xFEFF, then this element is skipped and it is assumed
477 that sUTF16[] is in the CPU's native byte order.
478
479 If bTestByteOrder is true and the first element of sUTF16[]
480 is 0xFFFE, then this element is skipped and it is assumed
481 that sUTF16[] is not in the CPU's native byte order and bytes
482 are swapped before characters are converted.
483
484 If bTestByteOrder is false or the first character of sUTF16[]
485 is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match
486 the CPU's byte order.
487
488 sUTF16_count - [in]
489 If sUTF16_count >= 0, then it specifies the number of
490 ON__UINT16 elements in sUTF16[] to convert.
491
492 If sUTF16_count == -1, then sUTF16 must be a null terminated
493 string and all the elements up to the first null element are
494 converted.
495
496 sUTF8 - [out]
497 If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
498 encoded string is returned in this buffer. If there is room
499 for the null terminator, the converted string will be null
500 terminated. The null terminator is never included in the count
501 of returned by this function. The converted string is in the
502 CPU's native byte order. No byte order mark is prepended.
503
504 sUTF8_count - [in]
505 If sUTF8_count > 0, then it specifies the number of available
506 ON__UINT8 elements in the sUTF8[] buffer.
507
508 If sUTF8_count == 0, then the sUTF8 parameter is ignored.
509
510 error_status - [out]
511 If error_status is not null, then bits of *error_status are
512 set to indicate the success or failure of the conversion.
513 When the error_mask parameter is used to used to mask some
514 conversion errors, multiple bits may be set.
515 0: Successful conversion with no errors.
516 1: Invalid input parameters. This error cannot be masked.
517 2: The sUTF8 output buffer was not large enough to hold
518 the converted string. This error cannot be masked.
519 16: An illegal UTF-16 encoding sequence occured or an invalid
520 unicode code point value resulted from decoding a
521 UTF-16 sequence. This error can be masked. If the error is
522 masked and error_code_point is a valid unicode code point,
523 then its UTF-8 encoding is added to the UTF-8 output
524 string and parsing continues.
525
526 error_mask - [in]
527 If 0 != (error_mask & 16) and error_code_point is a valid unicode
528 code point value, then type 16 errors are masked.
529
530 error_code_point - [in]
531 Unicode code point value to use in when masking type 16 errors.
532 If 0 == (error_mask & 16), then this parameter is ignored.
533 0xFFFD is a popular choice for the error_code_point value.
534
535 sNextUTF16 - [out]
536 If sNextUTF16 is not null, then *sNextUTF16 points to the first
537 element in the input sUTF16[] buffer that was not converted.
538
539 If an error occurs and is not masked, then *sNextUTF16 points to
540 the element of sUTF16[] where the conversion failed. If no errors
541 occur or all errors are masked, then *sNextUTF16 points to
542 sUTF16 + sUTF16_count.
543
544 If sUTF8_count > 0, the return value is the number of ON__UINT8
545 elements written to sUTF8[]. When the return value < sUTF8_count,
546 a null terminator is written to sUTF8[return value].
547
548 If sUTF8_count == 0, the return value is the minimum number of
549 ON__UINT8 elements that are needed to hold the converted string.
550 The return value does not include room for a null terminator.
551 Increment the return value by one if you want to have an element
552 to use for a null terminator.
553*/
554ON_DECL
555int ON_ConvertUTF16ToUTF8(
556 int bTestByteOrder,
557 const ON__UINT16* sUTF16,
558 int sUTF16_count,
559 ON__UINT8* sUTF8,
560 int sUTF8_count,
561 unsigned int* error_status,
562 unsigned int error_mask,
563 ON__UINT32 error_code_point,
564 const ON__UINT16** sNextUTF16
565 );
566
567/*
568Description:
569 Convert a unicode string from a UTF-16 encoded ON__UINT16 array
570 into a UTF-32 encoded ON__UINT32 array.
571
572Parameters:
573 bTestByteOrder - [in]
574 If bTestByteOrder is true and the first element of sUTF16[]
575 is 0xFEFF, then this element is ignored.
576
577 If bTestByteOrder is true and the first element of sUTF16[]
578 is 0xFFFE, then this element is ignored and the subsequent
579 elements of sUTF16[] have their bytes swapped before the
580 conversion is calculated.
581
582 In all other cases the first element of sUTF16[] is
583 converted and no byte swapping is performed.
584
585 sUTF16 - [in]
586 UTF-16 string to convert.
587
588 If bTestByteOrder is true and the first element of sUTF16[]
589 is 0xFEFF, then this element is skipped and it is assumed
590 that sUTF16[] is in the CPU's native byte order.
591
592 If bTestByteOrder is true and the first element of sUTF16[]
593 is 0xFFFE, then this element is skipped and it is assumed
594 that sUTF16[] is not in the CPU's native byte order and bytes
595 are swapped before characters are converted.
596
597 If bTestByteOrder is false or the first character of sUTF16[]
598 is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match
599 the CPU's byte order.
600
601 sUTF16_count - [in]
602 If sUTF16_count >= 0, then it specifies the number of
603 ON__UINT16 elements in sUTF16[] to convert.
604
605 If sUTF16_count == -1, then sUTF16 must be a null terminated
606 string and all the elements up to the first null element are
607 converted.
608
609 sUTF32 - [out]
610 If sUTF32 is not null and sUTF32_count > 0, then the UTF-32
611 encoded string is returned in this buffer. If there is room
612 for the null terminator, the converted string will be null
613 terminated. The null terminator is never included in the count
614 of returned by this function. The converted string is in the
615 CPU's native byte order. No byte order mark is prepended.
616
617 sUTF32_count - [in]
618 If sUTF32_count > 0, then it specifies the number of available
619 ON__UINT32 elements in the sUTF32[] buffer.
620
621 If sUTF32_count == 0, then the sUTF32 parameter is ignored.
622
623 error_status - [out]
624 If error_status is not null, then bits of *error_status are
625 set to indicate the success or failure of the conversion.
626 When the error_mask parameter is used to used to mask some
627 conversion errors, multiple bits may be set.
628 0: Successful conversion with no errors.
629 1: Invalid input parameters. This error cannot be masked.
630 2: The sUTF32 output buffer was not large enough to hold
631 the converted string. This error cannot be masked.
632 16: An illegal UTF-16 encoding sequence occured or an invalid
633 unicode code point value resulted from decoding a
634 UTF-16 sequence. This error can be masked. If the error is
635 masked and error_code_point is a valid unicode code point,
636 then its value is added to the UTF-32 output string and
637 parsing continues.
638
639 error_mask - [in]
640 If 0 != (error_mask & 16) and error_code_point is a valid unicode
641 code point value, then type 16 errors are masked.
642
643 error_code_point - [in]
644 Unicode code point value to use in when masking type 16 errors.
645 If 0 == (error_mask & 16), then this parameter is ignored.
646 0xFFFD is a popular choice for the error_code_point value.
647
648 sNextUTF16 - [out]
649 If sNextUTF16 is not null, then *sNextUTF16 points to the first
650 element in the input sUTF16[] buffer that was not converted.
651
652 If an error occurs and is not masked, then *sNextUTF16 points to
653 the element of sUTF16[] where the conversion failed. If no errors
654 occur or all errors are masked, then *sNextUTF16 points to
655 sUTF16 + sUTF16_count.
656
657Returns:
658 If sUTF32_count > 0, the return value is the number of ON__UINT32
659 elements written to sUTF32[]. When the return value < sUTF32_count,
660 a null terminator is written to sUTF32[return value].
661
662 If sUTF32_count == 0, the return value is the minimum number of
663 ON__UINT32 elements that are needed to hold the converted string.
664 The return value does not include room for a null terminator.
665 Increment the return value by one if you want to have an element
666 to use for a null terminator.
667*/
668ON_DECL
669int ON_ConvertUTF16ToUTF32(
670 int bTestByteOrder,
671 const ON__UINT16* sUTF16,
672 int sUTF16_count,
673 unsigned int* sUTF32,
674 int sUTF32_count,
675 unsigned int* error_status,
676 unsigned int error_mask,
677 ON__UINT32 error_code_point,
678 const ON__UINT16** sNextUTF16
679 );
680
681/*
682Description:
683 Convert a unicode string from a UTF-32 encoded ON__UINT32 array
684 into a UTF-8 encoded ON__UINT8 array.
685
686Parameters:
687 bTestByteOrder - [in]
688 If bTestByteOrder is true and the first element of sUTF32[]
689 is 0x0000FEFF, then this element is ignored.
690
691 If bTestByteOrder is true and the first element of sUTF32[]
692 is 0xFFFE0000, then this element is ignored and the subsequent
693 elements of sUTF32[] have their bytes swapped before the
694 conversion is calculated.
695
696 In all other cases the first element of sUTF32[] is
697 converted and no byte swapping is performed.
698
699 sUTF32 - [in]
700 UTF-32 string to convert.
701
702 If bTestByteOrder is true and the first element of sUTF32[]
703 is 0x0000FEFF, then this element is skipped and it is assumed
704 that sUTF32[] is in the CPU's native byte order.
705
706 If bTestByteOrder is true and the first element of sUTF32[]
707 is 0xFFFE0000, then this element is skipped and it is assumed
708 that sUTF32[] is not in the CPU's native byte order and bytes
709 are swapped before characters are converted.
710
711 If bTestByteOrder is false or the first character of sUTF32[]
712 is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string
713 must match the CPU's byte order.
714
715 sUTF32_count - [in]
716 If sUTF32_count >= 0, then it specifies the number of
717 ON__UINT32 elements in sUTF32[] to convert.
718
719 If sUTF32_count == -1, then sUTF32 must be a null terminated
720 string and all the elements up to the first null element are
721 converted.
722
723 sUTF8 - [out]
724 If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
725 encoded string is returned in this buffer. If there is room
726 for the null terminator, the converted string will be null
727 terminated. The null terminator is never included in the count
728 of returned by this function. The converted string is in the
729 CPU's native byte order. No byte order mark is prepended.
730
731 sUTF8_count - [in]
732 If sUTF8_count > 0, then it specifies the number of available
733 ON__UINT8 elements in the sUTF8[] buffer.
734
735 If sUTF8_count == 0, then the sUTF8 parameter is ignored.
736
737 error_status - [out]
738 If error_status is not null, then bits of *error_status are
739 set to indicate the success or failure of the conversion.
740 When the error_mask parameter is used to used to mask some
741 conversion errors, multiple bits may be set.
742 0: Successful conversion with no errors.
743 1: Invalid input parameters. This error cannot be masked.
744 2: The sUTF8 output buffer was not large enough to hold
745 the converted string. This error cannot be masked.
746 4: The values of two UTF-32 elements form a valid
747 UTF-16 surrogate pair. This error can be masked. If the
748 error is masked, then the surrogate pair is converted
749 to a valid unicode code point, its UTF-8 encoding is
750 added to the UTF-8 output string and parsing continues.
751 16: An invalid unicode code point occured in sUTF32[].
752 This error can be masked. If the error is masked and
753 error_code_point is a valid unicode code point,
754 then its UTF-8 encoding is added to the UTF-8 output
755 string and parsing continues.
756
757 error_mask - [in]
758 If 0 != (error_mask & 4), then type 4 errors are masked.
759 If 0 != (error_mask & 16) and error_code_point is a valid unicode
760 code point value, then type 16 errors are masked.
761
762 error_code_point - [in]
763 Unicode code point value to use in when masking type 16 errors.
764 If 0 == (error_mask & 16), then this parameter is ignored.
765 0xFFFD is a popular choice for the error_code_point value.
766
767 sNextUTF32 - [out]
768 If sNextUTF32 is not null, then *sNextUTF32 points to the first
769 element in the input sUTF32[] buffer that was not converted.
770
771 If an error occurs and is not masked, then *sNextUTF32 points to
772 the element of sUTF32[] where the conversion failed. If no errors
773 occur or all errors are masked, then *sNextUTF32 points to
774 sUTF32 + sUTF32_count.
775
776Returns:
777 If sUTF8_count > 0, the return value is the number of ON__UINT8
778 elements written to sUTF8[]. When the return value < sUTF8_count,
779 a null terminator is written to sUTF8[return value].
780
781 If sUTF8_count == 0, the return value is the minimum number of
782 ON__UINT8 elements that are needed to hold the converted string.
783 The return value does not include room for a null terminator.
784 Increment the return value by one if you want to have an element
785 to use for a null terminator.
786*/
787ON_DECL
788int ON_ConvertUTF32ToUTF8(
789 int bTestByteOrder,
790 const ON__UINT32* sUTF32,
791 int sUTF32_count,
792 ON__UINT8* sUTF8,
793 int sUTF8_count,
794 unsigned int* error_status,
795 unsigned int error_mask,
796 ON__UINT32 error_code_point,
797 const ON__UINT32** sNextUTF32
798 );
799
800/*
801Description:
802 Convert a unicode string from a UTF-32 encoded ON__UINT32 array
803 into a UTF-16 encoded ON__UINT16 array.
804
805Parameters:
806 bTestByteOrder - [in]
807 If bTestByteOrder is true and the first element of sUTF32[]
808 is 0x0000FEFF, then this element is ignored.
809
810 If bTestByteOrder is true and the first element of sUTF32[]
811 is 0xFFFE0000, then this element is ignored and the subsequent
812 elements of sUTF32[] have their bytes swapped before the
813 conversion is calculated.
814
815 In all other cases the first element of sUTF32[] is
816 converted and no byte swapping is performed.
817
818 sUTF32 - [in]
819 UTF-32 string to convert.
820
821 If bTestByteOrder is true and the first element of sUTF32[]
822 is 0x0000FEFF, then this element is skipped and it is assumed
823 that sUTF32[] is in the CPU's native byte order.
824
825 If bTestByteOrder is true and the first element of sUTF32[]
826 is 0xFFFE0000, then this element is skipped and it is assumed
827 that sUTF32[] is not in the CPU's native byte order and bytes
828 are swapped before characters are converted.
829
830 If bTestByteOrder is false or the first character of sUTF32[]
831 is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string
832 must match the CPU's byte order.
833
834 sUTF32_count - [in]
835 If sUTF32_count >= 0, then it specifies the number of
836 ON__UINT32 elements in sUTF32[] to convert.
837
838 If sUTF32_count == -1, then sUTF32 must be a null terminated
839 string and all the elements up to the first null element are
840 converted.
841
842 sUTF16 - [out]
843 If sUTF16 is not null and sUTF16_count > 0, then the UTF-16
844 encoded string is returned in this buffer. If there is room
845 for the null terminator, the converted string will be null
846 terminated. The null terminator is never included in the count
847 of returned by this function. The converted string is in the
848 CPU's native byte order. No byte order mark is prepended.
849
850 sUTF16_count - [in]
851 If sUTF16_count > 0, then it specifies the number of available
852 ON__UINT16 elements in the sUTF16[] buffer.
853
854 If sUTF16_count == 0, then the sUTF16 parameter is ignored.
855
856 error_status - [out]
857 If error_status is not null, then bits of *error_status are
858 set to indicate the success or failure of the conversion.
859 When the error_mask parameter is used to used to mask some
860 conversion errors, multiple bits may be set.
861 0: Successful conversion with no errors.
862 1: Invalid input parameters. This error cannot be masked.
863 2: The sUTF16 output buffer was not large enough to hold
864 the converted string. This error cannot be masked.
865 4: The values of two UTF-32 elements form a valid
866 UTF-16 surrogate pair. This error can be masked. If the
867 error is masked, then the surrogate pair is added to
868 the UTF-16 output string and parsing continues.
869 16: An invalid unicode code point occured in sUTF32[].
870 This error can be masked. If the error is masked and
871 error_code_point is a valid unicode code point,
872 then its UTF-16 encoding is added to the UTF-16 output
873 string and parsing continues.
874
875 error_mask - [in]
876 If 0 != (error_mask & 4), then type 4 errors are masked.
877 If 0 != (error_mask & 16) and error_code_point is a valid unicode
878 code point value, then type 16 errors are masked.
879
880 error_code_point - [in]
881 Unicode code point value to use in when masking type 16 errors.
882 If 0 == (error_mask & 16), then this parameter is ignored.
883 0xFFFD is a popular choice for the error_code_point value.
884
885 sNextUnicode - [out]
886 If sNextUnicode is not null, then *sNextUnicode points to the first
887 byte in the input sNextUnicode[] buffer that was not converted.
888
889 If an error occurs and is not masked, then this unsigned int
890 will be an illegal unicode code point value.
891
892 If an error does not occur, then (*sNextUnicode - sUnicode)
893 is the number of values converted.
894
895Returns:
896 If sUTF16_count > 0, the return value is the number of ON__UINT16
897 elements written to sUTF16[]. When the return value < sUTF16_count,
898 a null terminator is written to sUTF16[return value].
899
900 If sUTF16_count == 0, the return value is the minimum number of
901 ON__UINT16 elements that are needed to hold the converted string.
902 The return value does not include room for a null terminator.
903 Increment the return value by one if you want to have an element
904 to use for a null terminator.
905*/
906ON_DECL
907int ON_ConvertUTF32ToUTF16(
908 int bTestByteOrder,
909 const ON__UINT32* sUTF32,
910 int sUTF32_count,
911 ON__UINT16* sUTF16,
912 int sUTF16_count,
913 unsigned int* error_status,
914 unsigned int error_mask,
915 ON__UINT32 error_code_point,
916 const ON__UINT32** sNextUTF32
917 );
918
919/*
920Description:
921 Convert a wchar_t string using the native platform's most common
922 encoding into a unicode string encoded as a UTF-8 char array.
923
924 If 2 = sizeof(wchar_t), then the wchar_t array is assumed to be
925 a UTF-16 encoded string. This is the case with current versions
926 of Microsoft Windows.
927
928 If 4 = sizeof(wchar)t), then the wchar_t array is assumed to be
929 a UTF-32 encoded string. This is the case with current versions
930 of Apple OSX.
931
932Parameters:
933 bTestByteOrder - [in]
934 If bTestByteOrder is true and the first element of sWideChar[]
935 is 0xFEFF, then this element is ignored.
936
937 If bTestByteOrder is true and the first element of sWideChar[]
938 is 0xFFFE, then this element is ignored and the subsequent
939 elements of sWideChar[] have their bytes swapped before the
940 conversion is calculated.
941
942 In all other cases the first element of sWideChar[] is
943 converted and no byte swapping is performed.
944
945 sWideChar - [in]
946 wchar_t string to convert.
947
948 If bTestByteOrder is true and the first element of sWideChar[]
949 is 0xFEFF, then this element is skipped and it is assumed
950 that sWideChar[] is in the CPU's native byte order.
951
952 If bTestByteOrder is true and the first element of sWideChar[]
953 is 0xFFFE, then this element is skipped and it is assumed
954 that sWideChar[] is not in the CPU's native byte order and bytes
955 are swapped before characters are converted.
956
957 If bTestByteOrder is false or the first character of sWideChar[]
958 is neither 0xFEFF nor 0xFFFE, then the sWideChar string must match
959 the CPU's byte order.
960
961 sWideChar_count - [in]
962 If sWideChar_count >= 0, then it specifies the number of
963 wchar_t elements in sWideChar[] to convert.
964
965 If sWideChar_count == -1, then sWideChar must be a null terminated
966 string and all the elements up to the first null element are
967 converted.
968
969 sUTF8 - [out]
970 If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
971 encoded string is returned in this buffer. If there is room
972 for the null terminator, the converted string will be null
973 terminated. The null terminator is never included in the count
974 of returned by this function. The converted string is in the
975 CPU's native byte order. No byte order mark is prepended.
976
977 sUTF8_count - [in]
978 If sUTF8_count > 0, then it specifies the number of available
979 ON__UINT8 elements in the sUTF8[] buffer.
980
981 If sUTF8_count == 0, then the sUTF8 parameter is ignored.
982
983 error_status - [out]
984 If error_status is not null, then bits of *error_status are
985 set to indicate the success or failure of the conversion.
986 When the error_mask parameter is used to used to mask some
987 conversion errors, multiple bits may be set.
988 0: Successful conversion with no errors.
989 1: Invalid input parameters. This error cannot be masked.
990 2: The sUTF8 output buffer was not large enough to hold
991 the converted string. This error cannot be masked.
992 16: An illegal wchar_t encoding sequence occured or an invalid
993 unicode code point value resulted from decoding a
994 wchar_t sequence. This error can be masked. If the error is
995 masked and error_code_point is a valid unicode code point,
996 then its UTF-8 encoding is added to the UTF-8 output
997 string and parsing continues.
998
999 error_mask - [in]
1000 If 0 != (error_mask & 16) and error_code_point is a valid unicode
1001 code point value, then type 16 errors are masked.
1002
1003 error_code_point - [in]
1004 Unicode code point value to use in when masking type 16 errors.
1005 If 0 == (error_mask & 16), then this parameter is ignored.
1006 0xFFFD is a popular choice for the error_code_point value.
1007
1008 sNextWideChar - [out]
1009 If sNextWideChar is not null, then *sNextWideChar points to the first
1010 element in the input sWideChar[] buffer that was not converted.
1011
1012 If an error occurs and is not masked, then *sNextWideChar points to
1013 the element of sWideChar[] where the conversion failed. If no errors
1014 occur or all errors are masked, then *sNextWideChar points to
1015 sWideChar + sWideChar_count.
1016
1017 If sUTF8_count > 0, the return value is the number of ON__UINT8
1018 elements written to sUTF8[]. When the return value < sUTF8_count,
1019 a null terminator is written to sUTF8[return value].
1020
1021 If sUTF8_count == 0, the return value is the minimum number of
1022 ON__UINT8 elements that are needed to hold the converted string.
1023 The return value does not include room for a null terminator.
1024 Increment the return value by one if you want to have an element
1025 to use for a null terminator.
1026*/
1027ON_DECL
1028int ON_ConvertWideCharToUTF8(
1029 int bTestByteOrder,
1030 const wchar_t* sWideChar,
1031 int sWideChar_count,
1032 char* sUTF8,
1033 int sUTF8_count,
1034 unsigned int* error_status,
1035 unsigned int error_mask,
1036 ON__UINT32 error_code_point,
1037 const wchar_t** sNextWideChar
1038 );
1039
1040/*
1041Description:
1042 Convert a UTF-8 encoded char string to wchar_t string using
1043 the native platform's most common encoding.
1044
1045 If 2 = sizeof(wchar_t), then UTF-16 encoding is used for the
1046 output string. This is the case with current versions of
1047 Microsoft Windows.
1048
1049 If 4 = sizeof(wchar_t), then UTF-32 encoding is used for the
1050 output string. This is the case with current versions of
1051 Apple OSX.
1052
1053Parameters:
1054 sUTF8 - [in]
1055 UTF-8 string to convert.
1056
1057 sUTF8_count - [in]
1058 If sUTF8_count >= 0, then it specifies the number of
1059 ON__UINT8 elements in sUTF8[] to convert.
1060
1061 If sUTF8_count == -1, then sUTF8 must be a null terminated
1062 string and all the elements up to the first null element are
1063 converted.
1064
1065 sWideChar - [out]
1066 If sWideChar is not null and sWideChar_count > 0, then the
1067 output string is returned in this buffer. If there is room
1068 for the null terminator, the converted string will be null
1069 terminated. The null terminator is never included in the count
1070 of returned by this function. The converted string is in the
1071 CPU's native byte order. No byte order mark is prepended.
1072
1073 sWideChar_count - [in]
1074 If sWideChar_count > 0, then it specifies the number of available
1075 wchar_t elements in the sWideChar[] buffer.
1076
1077 If sWideChar_count == 0, then the sWideChar parameter is ignored.
1078
1079 error_status - [out]
1080 If error_status is not null, then bits of *error_status are
1081 set to indicate the success or failure of the conversion.
1082 When the error_mask parameter is used to used to mask some
1083 conversion errors, multiple bits may be set.
1084 0: Successful conversion with no errors.
1085 1: Invalid input parameters. This error cannot be masked.
1086 2: The sWideChar output buffer was not large enough to hold
1087 the converted string. This error cannot be masked.
1088 4: The values of two UTF-8 encoding sequences formed a valid
1089 UTF-16 surrogate pair. This error can be masked. If the
1090 error is masked, then the surrogate pair is added
1091 to the UTF-16 output string and parsing continues.
1092 8: An overlong UTF-8 encoding sequence was encountered.
1093 The value of the overlong sequence was a valid unicode
1094 code point. This error can be masked. If the error is masked,
1095 then the unicode code point is encoded and added to the
1096 UTF-16 output string and parsing continues.
1097 16: An illegal UTF-8 encoding sequence occured or an invalid
1098 unicode code point value resulted from decoding a
1099 UTF-8 sequence. This error can be masked. If the error is
1100 masked and error_code_point is a valid unicode code point,
1101 then its encoding is added to the output string and parsing
1102 continues.
1103
1104 error_mask - [in]
1105 If 0 != (error_mask & 4), then type 4 errors are masked.
1106 If 0 != (error_mask & 8), then type 8 errors are masked.
1107 If 0 != (error_mask & 16) and error_code_point is a valid unicode
1108 code point value, then type 16 errors are masked.
1109
1110 error_code_point - [in]
1111 Unicode code point value to use in when masking type 16 errors.
1112 If 0 == (error_mask & 16), then this parameter is ignored.
1113 0xFFFD is a popular choice for the error_code_point value.
1114
1115 sNextUTF8 - [out]
1116 If sNextUTF8 is not null, then *sNextUTF8 points to the first
1117 element in the input sUTF8[] buffer that was not converted.
1118
1119 If an error occurs and is not masked, then *sNextUTF8 points to
1120 the element of sUTF8[] where the conversion failed. If no errors
1121 occur or all errors are masked, then *sNextUTF8 points to
1122 sUTF8 + sUTF8_count.
1123
1124Returns:
1125 If sWideChar_count > 0, the return value is the number of wchar_t
1126 elements written to sWideChar[]. When the return value < sWideChar_count,
1127 a null terminator is written to sWideChar[return value].
1128
1129 If sWideChar_count == 0, the return value is the minimum number of
1130 wchar_t elements that are needed to hold the converted string.
1131 The return value does not include room for a null terminator.
1132 Increment the return value by one if you want to have an element
1133 to use for a null terminator.
1134*/
1135ON_DECL
1136int ON_ConvertUTF8ToWideChar(
1137 const char* sUTF8,
1138 int sUTF8_count,
1139 wchar_t* sWideChar,
1140 int sWideChar_count,
1141 unsigned int* error_status,
1142 unsigned int error_mask,
1143 ON__UINT32 error_code_point,
1144 const char** sNextUTF8
1145 );
1146
1147ON_END_EXTERNC
1148
1149#endif