ICU 68.2  68.2
Data Structures | Namespaces | Macros | Typedefs | Enumerations | Functions
uidna.h File Reference

C API: Internationalizing Domain Names in Applications (IDNA) More...

#include "unicode/utypes.h"
#include <stdbool.h>
#include "unicode/parseerr.h"
#include "unicode/localpointer.h"

Go to the source code of this file.

Data Structures

struct  UIDNAInfo
 Output container for IDNA processing errors. More...
 

Namespaces

 icu
 File coll.h.
 

Macros

#define UIDNA_INFO_INITIALIZER
 Static initializer for a UIDNAInfo struct. More...
 

Typedefs

typedef struct UIDNA UIDNA
 C typedef for struct UIDNA. More...
 
typedef struct UIDNAInfo UIDNAInfo
 Output container for IDNA processing errors. More...
 

Enumerations

enum  {
  UMSGPAT_ARG_NAME_NOT_NUMBER =-1, UMSGPAT_ARG_NAME_NOT_VALID =-2, U_PARSE_CONTEXT_LEN = 16, UIDNA_DEFAULT =0,
  UIDNA_ALLOW_UNASSIGNED =1, UIDNA_USE_STD3_RULES =2, UIDNA_CHECK_BIDI =4, UIDNA_CHECK_CONTEXTJ =8,
  UIDNA_NONTRANSITIONAL_TO_ASCII =0x10, UIDNA_NONTRANSITIONAL_TO_UNICODE =0x20, UIDNA_CHECK_CONTEXTO =0x40, UITER_UNKNOWN_INDEX =-2,
  UNORM_UNICODE_3_2 =0x20, USET_IGNORE_SPACE = 1, USET_CASE_INSENSITIVE = 2, USET_ADD_CASE_MAPPINGS = 4,
  UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, UTEXT_PROVIDER_STABLE_CHUNKS = 2, UTEXT_PROVIDER_WRITABLE = 3, UTEXT_PROVIDER_HAS_META_DATA = 4,
  UTEXT_PROVIDER_OWNS_TEXT = 5
}
 
enum  {
  UIDNA_ERROR_EMPTY_LABEL =1, UIDNA_ERROR_LABEL_TOO_LONG =2, UIDNA_ERROR_DOMAIN_NAME_TOO_LONG =4, UIDNA_ERROR_LEADING_HYPHEN =8,
  UIDNA_ERROR_TRAILING_HYPHEN =0x10, UIDNA_ERROR_HYPHEN_3_4 =0x20, UIDNA_ERROR_LEADING_COMBINING_MARK =0x40, UIDNA_ERROR_DISALLOWED =0x80,
  UIDNA_ERROR_PUNYCODE =0x100, UIDNA_ERROR_LABEL_HAS_DOT =0x200, UIDNA_ERROR_INVALID_ACE_LABEL =0x400, UIDNA_ERROR_BIDI =0x800,
  UIDNA_ERROR_CONTEXTJ =0x1000, UIDNA_ERROR_CONTEXTO_PUNCTUATION =0x2000, UIDNA_ERROR_CONTEXTO_DIGITS =0x4000, USET_SERIALIZED_STATIC_ARRAY_CAPACITY =8,
  UTEXT_MAGIC = 0x345ad82c
}
 

Functions

U_CAPI UIDNAuidna_openUTS46 (uint32_t options, UErrorCode *pErrorCode)
 Returns a UIDNA instance which implements UTS #46. More...
 
U_CAPI void uidna_close (UIDNA *idna)
 Closes a UIDNA instance. More...
 
U_CAPI int32_t uidna_labelToASCII (const UIDNA *idna, const UChar *label, int32_t length, UChar *dest, int32_t capacity, UIDNAInfo *pInfo, UErrorCode *pErrorCode)
 Converts a single domain name label into its ASCII form for DNS lookup. More...
 
U_CAPI int32_t uidna_labelToUnicode (const UIDNA *idna, const UChar *label, int32_t length, UChar *dest, int32_t capacity, UIDNAInfo *pInfo, UErrorCode *pErrorCode)
 Converts a single domain name label into its Unicode form for human-readable display. More...
 
U_CAPI int32_t uidna_nameToASCII (const UIDNA *idna, const UChar *name, int32_t length, UChar *dest, int32_t capacity, UIDNAInfo *pInfo, UErrorCode *pErrorCode)
 Converts a whole domain name into its ASCII form for DNS lookup. More...
 
U_CAPI int32_t uidna_nameToUnicode (const UIDNA *idna, const UChar *name, int32_t length, UChar *dest, int32_t capacity, UIDNAInfo *pInfo, UErrorCode *pErrorCode)
 Converts a whole domain name into its Unicode form for human-readable display. More...
 
U_CAPI int32_t uidna_labelToASCII_UTF8 (const UIDNA *idna, const char *label, int32_t length, char *dest, int32_t capacity, UIDNAInfo *pInfo, UErrorCode *pErrorCode)
 Converts a single domain name label into its ASCII form for DNS lookup. More...
 
U_CAPI int32_t uidna_labelToUnicodeUTF8 (const UIDNA *idna, const char *label, int32_t length, char *dest, int32_t capacity, UIDNAInfo *pInfo, UErrorCode *pErrorCode)
 Converts a single domain name label into its Unicode form for human-readable display. More...
 
U_CAPI int32_t uidna_nameToASCII_UTF8 (const UIDNA *idna, const char *name, int32_t length, char *dest, int32_t capacity, UIDNAInfo *pInfo, UErrorCode *pErrorCode)
 Converts a whole domain name into its ASCII form for DNS lookup. More...
 
U_CAPI int32_t uidna_nameToUnicodeUTF8 (const UIDNA *idna, const char *name, int32_t length, char *dest, int32_t capacity, UIDNAInfo *pInfo, UErrorCode *pErrorCode)
 Converts a whole domain name into its Unicode form for human-readable display. More...
 
int32_t uidna_toASCII (const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, int32_t options, UParseError *parseError, UErrorCode *status)
 IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. More...
 
int32_t uidna_toUnicode (const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, int32_t options, UParseError *parseError, UErrorCode *status)
 IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC. More...
 
int32_t uidna_IDNToASCII (const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, int32_t options, UParseError *parseError, UErrorCode *status)
 IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. More...
 
int32_t uidna_IDNToUnicode (const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, int32_t options, UParseError *parseError, UErrorCode *status)
 IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. More...
 
int32_t uidna_compare (const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, int32_t options, UErrorCode *status)
 IDNA2003: Compare two IDN strings for equivalence. More...
 

Detailed Description

C API: Internationalizing Domain Names in Applications (IDNA)

IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.

The C API functions which do take a UIDNA * service object pointer implement UTS #46 and IDNA2008.

IDNA2003 is obsolete. The C API functions which do not take a service object pointer implement IDNA2003. They are all deprecated.

Definition in file uidna.h.

Macro Definition Documentation

◆ UIDNA_INFO_INITIALIZER

#define UIDNA_INFO_INITIALIZER
Value:
{ \
(int16_t)sizeof(UIDNAInfo), \
false, false, \
0, 0, 0 }

Static initializer for a UIDNAInfo struct.

Stable:
ICU 4.6

Definition at line 209 of file uidna.h.

Typedef Documentation

◆ UIDNA

typedef struct UIDNA UIDNA

C typedef for struct UIDNA.

Stable:
ICU 4.6

Definition at line 1 of file uidna.h.

◆ UIDNAInfo

typedef struct UIDNAInfo UIDNAInfo

Output container for IDNA processing errors.

Initialize with UIDNA_INFO_INITIALIZER:

int32_t length = uidna_nameToASCII(..., &info, &errorCode);
if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
Stable:
ICU 4.6

Enumeration Type Documentation

◆ anonymous enum

anonymous enum
Enumerator
UMSGPAT_ARG_NAME_NOT_NUMBER 

Return value from MessagePattern.validateArgumentName() for when the string is a valid "pattern identifier" but not a number.

Stable:
ICU 4.8
UMSGPAT_ARG_NAME_NOT_VALID 

Return value from MessagePattern.validateArgumentName() for when the string is invalid.

It might not be a valid "pattern identifier", or it have only ASCII digits but there is a leading zero or the number is too large.

Stable:
ICU 4.8
UIDNA_DEFAULT 

Default options value: None of the other options are set.

For use in static worker and factory methods.

Stable:
ICU 2.6
UIDNA_ALLOW_UNASSIGNED 

Option to allow unassigned code points in domain names and labels.

For use in static worker and factory methods.

This option is ignored by the UTS46 implementation. (UTS #46 disallows unassigned code points.)

Deprecated:
ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
UIDNA_USE_STD3_RULES 

Option to check whether the input conforms to the STD3 ASCII rules, for example the restriction of labels to LDH characters (ASCII Letters, Digits and Hyphen-Minus).

For use in static worker and factory methods.

Stable:
ICU 2.6
UIDNA_CHECK_BIDI 

IDNA option to check for whether the input conforms to the BiDi rules.

For use in static worker and factory methods.

This option is ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.)

Stable:
ICU 4.6
UIDNA_CHECK_CONTEXTJ 

IDNA option to check for whether the input conforms to the CONTEXTJ rules.

For use in static worker and factory methods.

This option is ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.)

Stable:
ICU 4.6
UIDNA_NONTRANSITIONAL_TO_ASCII 

IDNA option for nontransitional processing in ToASCII().

For use in static worker and factory methods.

By default, ToASCII() uses transitional processing.

This option is ignored by the IDNA2003 implementation. (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)

Stable:
ICU 4.6
UIDNA_NONTRANSITIONAL_TO_UNICODE 

IDNA option for nontransitional processing in ToUnicode().

For use in static worker and factory methods.

By default, ToUnicode() uses transitional processing.

This option is ignored by the IDNA2003 implementation. (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)

Stable:
ICU 4.6
UIDNA_CHECK_CONTEXTO 

IDNA option to check for whether the input conforms to the CONTEXTO rules.

For use in static worker and factory methods.

This option is ignored by the IDNA2003 implementation. (The CONTEXTO check is new in IDNA2008.)

This is for use by registries for IDNA2008 conformance. UTS #46 does not require the CONTEXTO check.

Stable:
ICU 49
UITER_UNKNOWN_INDEX 

Constant value that may be returned by UCharIteratorMove indicating that the final UTF-16 index is not known, but that the move succeeded.

This can occur when moving relative to limit or length, or when moving relative to the current index after a setState() when the current UTF-16 index is not known.

It would be very inefficient to have to count from the beginning of the text just to get the current/limit/length index after moving relative to it. The actual index can be determined with getIndex(UITER_CURRENT) which will count the UChars if necessary.

Stable:
ICU 2.6
UNORM_UNICODE_3_2 

Options bit set value to select Unicode 3.2 normalization (except NormalizationCorrections).

At most one Unicode version can be selected at a time.

Deprecated:
ICU 56 Use unorm2.h instead.
USET_IGNORE_SPACE 

Ignore white space within patterns unless quoted or escaped.

Stable:
ICU 2.4
USET_CASE_INSENSITIVE 

Enable case insensitive matching.

E.g., "[ab]" with this flag will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will match all except 'a', 'A', 'b', and 'B'. This performs a full closure over case mappings, e.g. U+017F for s.

The resulting set is a superset of the input for the code points but not for the strings. It performs a case mapping closure of the code points and adds full case folding strings for the code points, and reduces strings of the original set to their full case folding equivalents.

This is designed for case-insensitive matches, for example in regular expressions. The full code point case closure allows checking of an input character directly against the closure set. Strings are matched by comparing the case-folded form from the closure set with an incremental case folding of the string in question.

The closure set will also contain single code points if the original set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). This is not necessary (that is, redundant) for the above matching method but results in the same closure sets regardless of whether the original set contained the code point or a string.

Stable:
ICU 2.4
USET_ADD_CASE_MAPPINGS 

Enable case insensitive matching.

E.g., "[ab]" with this flag will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will match all except 'a', 'A', 'b', and 'B'. This adds the lower-, title-, and uppercase mappings as well as the case folding of each existing element in the set.

Stable:
ICU 3.2
UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE 

It is potentially time consuming for the provider to determine the length of the text.

Stable:
ICU 3.4
UTEXT_PROVIDER_STABLE_CHUNKS 

Text chunks remain valid and usable until the text object is modified or deleted, not just until the next time the access() function is called (which is the default).

Stable:
ICU 3.4
UTEXT_PROVIDER_WRITABLE 

The provider supports modifying the text via the replace() and copy() functions.

See also
Replaceable
Stable:
ICU 3.4
UTEXT_PROVIDER_HAS_META_DATA 

There is meta data associated with the text.

See also
Replaceable::hasMetaData()
Stable:
ICU 3.4
UTEXT_PROVIDER_OWNS_TEXT 

Text provider owns the text storage.

Generally occurs as the result of a deep clone of the UText. When closing the UText, the associated text must also be closed/deleted/freed/ whatever is appropriate.

Stable:
ICU 3.6

Definition at line 50 of file uidna.h.

◆ anonymous enum

anonymous enum
Enumerator
UIDNA_ERROR_EMPTY_LABEL 

A non-final domain name label (or the whole domain name) is empty.

Stable:
ICU 4.6
UIDNA_ERROR_LABEL_TOO_LONG 

A domain name label is longer than 63 bytes.

(See STD13/RFC1034 3.1. Name space specifications and terminology.) This is only checked in ToASCII operations, and only if the output label is all-ASCII.

Stable:
ICU 4.6
UIDNA_ERROR_DOMAIN_NAME_TOO_LONG 

A domain name is longer than 255 bytes in its storage form.

(See STD13/RFC1034 3.1. Name space specifications and terminology.) This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.

Stable:
ICU 4.6
UIDNA_ERROR_LEADING_HYPHEN 

A label starts with a hyphen-minus ('-').

Stable:
ICU 4.6
UIDNA_ERROR_TRAILING_HYPHEN 

A label ends with a hyphen-minus ('-').

Stable:
ICU 4.6
UIDNA_ERROR_HYPHEN_3_4 

A label contains hyphen-minus ('-') in the third and fourth positions.

Stable:
ICU 4.6
UIDNA_ERROR_LEADING_COMBINING_MARK 

A label starts with a combining mark.

Stable:
ICU 4.6
UIDNA_ERROR_DISALLOWED 

A label or domain name contains disallowed characters.

Stable:
ICU 4.6
UIDNA_ERROR_PUNYCODE 

A label starts with "xn--" but does not contain valid Punycode.

That is, an xn– label failed Punycode decoding.

Stable:
ICU 4.6
UIDNA_ERROR_LABEL_HAS_DOT 

A label contains a dot=full stop.

This can occur in an input string for a single-label function.

Stable:
ICU 4.6
UIDNA_ERROR_INVALID_ACE_LABEL 

An ACE label does not contain a valid label string.

The label was successfully ACE (Punycode) decoded but the resulting string had severe validation errors. For example, it might contain characters that are not allowed in ACE labels, or it might not be normalized.

Stable:
ICU 4.6
UIDNA_ERROR_BIDI 

A label does not meet the IDNA BiDi requirements (for right-to-left characters).

Stable:
ICU 4.6
UIDNA_ERROR_CONTEXTJ 

A label does not meet the IDNA CONTEXTJ requirements.

Stable:
ICU 4.6
UIDNA_ERROR_CONTEXTO_PUNCTUATION 

A label does not meet the IDNA CONTEXTO requirements for punctuation characters.

Some punctuation characters "Would otherwise have been DISALLOWED" but are allowed in certain contexts. (RFC 5892)

Stable:
ICU 49
UIDNA_ERROR_CONTEXTO_DIGITS 

A label does not meet the IDNA CONTEXTO requirements for digits.

Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).

Stable:
ICU 49
USET_SERIALIZED_STATIC_ARRAY_CAPACITY 

Capacity of USerializedSet::staticArray.

Enough for any single-code point set. Also provides padding for nice sizeof(USerializedSet).

Stable:
ICU 2.4

Definition at line 425 of file uidna.h.

Function Documentation

◆ uidna_close()

U_CAPI void uidna_close ( UIDNA idna)

Closes a UIDNA instance.

Parameters
idnaUIDNA instance to be closed
Stable:
ICU 4.6

◆ uidna_compare()

int32_t uidna_compare ( const UChar s1,
int32_t  length1,
const UChar s2,
int32_t  length2,
int32_t  options,
UErrorCode status 
)

IDNA2003: Compare two IDN strings for equivalence.

This function splits the domain names into labels and compares them. According to IDN RFC, whenever two labels are compared, they are considered equal if and only if their ASCII forms (obtained by applying toASCII) match using an case-insensitive ASCII comparison. Two domain names are considered a match if and only if all labels match regardless of whether label separators match.

Parameters
s1First source string.
length1Length of first source string, or -1 if NUL-terminated.
s2Second source string.
length2Length of second source string, or -1 if NUL-terminated.
optionsA bit set of options:
  • UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points and do not use STD3 ASCII rules If unassigned code points are found the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
  • UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations If this option is set, the unassigned code points are in the input are treated as normal Unicode code points.
  • UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions If this option is set and the input does not satisfy STD3 rules,
    the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
Parameters
statusICU error code in/out parameter. Must fulfill U_SUCCESS before the function call.
Returns
<0 or 0 or >0 as usual for string comparisons
Deprecated:
ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.

◆ uidna_IDNToASCII()

int32_t uidna_IDNToASCII ( const UChar src,
int32_t  srcLength,
UChar dest,
int32_t  destCapacity,
int32_t  options,
UParseError parseError,
UErrorCode status 
)

IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.

This operation is done on complete domain names, e.g: "www.example.com". It is important to note that this operation can fail. If it fails, then the input domain name cannot be used as an Internationalized Domain Name and the application should have methods defined to deal with the failure.

Note: IDNA RFC specifies that a conformant application should divide a domain name into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, and then convert. This function does not offer that level of granularity. The options once
set will apply to all labels in the domain name

Parameters
srcInput UChar array containing IDN in Unicode.
srcLengthNumber of UChars in src, or -1 if NUL-terminated.
destOutput UChar array with ASCII (ACE encoded) IDN.
destCapacitySize of dest.
optionsA bit set of options:
  • UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points and do not use STD3 ASCII rules If unassigned code points are found the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
  • UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations If this option is set, the unassigned code points are in the input are treated as normal Unicode code points.
  • UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions If this option is set and the input does not satisfy STD3 rules,
    the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
Parameters
parseErrorPointer to UParseError struct to receive information on position of error if an error is encountered. Can be NULL.
statusICU in/out error code parameter. U_INVALID_CHAR_FOUND if src contains unmatched single surrogates. U_INDEX_OUTOFBOUNDS_ERROR if src contains too many code points. U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
Returns
The length of the result string, if successful - or in case of a buffer overflow, in which case it will be greater than destCapacity.
Deprecated:
ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.

◆ uidna_IDNToUnicode()

int32_t uidna_IDNToUnicode ( const UChar src,
int32_t  srcLength,
UChar dest,
int32_t  destCapacity,
int32_t  options,
UParseError parseError,
UErrorCode status 
)

IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.

This operation is done on complete domain names, e.g: "www.example.com".

Note: IDNA RFC specifies that a conformant application should divide a domain name into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, and then convert. This function does not offer that level of granularity. The options once
set will apply to all labels in the domain name

Parameters
srcInput UChar array containing IDN in ASCII (ACE encoded) form.
srcLengthNumber of UChars in src, or -1 if NUL-terminated.
destOutput UChar array containing Unicode equivalent of source IDN.
destCapacitySize of dest.
optionsA bit set of options:
  • UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points and do not use STD3 ASCII rules If unassigned code points are found the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
  • UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations If this option is set, the unassigned code points are in the input are treated as normal Unicode code points.
  • UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions If this option is set and the input does not satisfy STD3 rules,
    the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
Parameters
parseErrorPointer to UParseError struct to receive information on position of error if an error is encountered. Can be NULL.
statusICU in/out error code parameter. U_INVALID_CHAR_FOUND if src contains unmatched single surrogates. U_INDEX_OUTOFBOUNDS_ERROR if src contains too many code points. U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
Returns
The length of the result string, if successful - or in case of a buffer overflow, in which case it will be greater than destCapacity.
Deprecated:
ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.

◆ uidna_labelToASCII()

U_CAPI int32_t uidna_labelToASCII ( const UIDNA idna,
const UChar label,
int32_t  length,
UChar dest,
int32_t  capacity,
UIDNAInfo pInfo,
UErrorCode pErrorCode 
)

Converts a single domain name label into its ASCII form for DNS lookup.

If any processing step fails, then pInfo->errors will be non-zero and the result might not be an ASCII string. The label might be modified according to the types of errors. Labels with severe errors will be left in (or turned into) their Unicode form.

The UErrorCode indicates an error only in exceptional cases, such as a U_MEMORY_ALLOCATION_ERROR.

Parameters
idnaUIDNA instance
labelInput domain name label
lengthLabel length, or -1 if NUL-terminated
destDestination string buffer
capacityDestination buffer capacity
pInfoOutput container of IDNA processing details.
pErrorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
destination string length
Stable:
ICU 4.6

◆ uidna_labelToASCII_UTF8()

U_CAPI int32_t uidna_labelToASCII_UTF8 ( const UIDNA idna,
const char *  label,
int32_t  length,
char *  dest,
int32_t  capacity,
UIDNAInfo pInfo,
UErrorCode pErrorCode 
)

Converts a single domain name label into its ASCII form for DNS lookup.

UTF-8 version of uidna_labelToASCII(), same behavior.

Parameters
idnaUIDNA instance
labelInput domain name label
lengthLabel length, or -1 if NUL-terminated
destDestination string buffer
capacityDestination buffer capacity
pInfoOutput container of IDNA processing details.
pErrorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
destination string length
Stable:
ICU 4.6

◆ uidna_labelToUnicode()

U_CAPI int32_t uidna_labelToUnicode ( const UIDNA idna,
const UChar label,
int32_t  length,
UChar dest,
int32_t  capacity,
UIDNAInfo pInfo,
UErrorCode pErrorCode 
)

Converts a single domain name label into its Unicode form for human-readable display.

If any processing step fails, then pInfo->errors will be non-zero. The label might be modified according to the types of errors.

The UErrorCode indicates an error only in exceptional cases, such as a U_MEMORY_ALLOCATION_ERROR.

Parameters
idnaUIDNA instance
labelInput domain name label
lengthLabel length, or -1 if NUL-terminated
destDestination string buffer
capacityDestination buffer capacity
pInfoOutput container of IDNA processing details.
pErrorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
destination string length
Stable:
ICU 4.6

◆ uidna_labelToUnicodeUTF8()

U_CAPI int32_t uidna_labelToUnicodeUTF8 ( const UIDNA idna,
const char *  label,
int32_t  length,
char *  dest,
int32_t  capacity,
UIDNAInfo pInfo,
UErrorCode pErrorCode 
)

Converts a single domain name label into its Unicode form for human-readable display.

UTF-8 version of uidna_labelToUnicode(), same behavior.

Parameters
idnaUIDNA instance
labelInput domain name label
lengthLabel length, or -1 if NUL-terminated
destDestination string buffer
capacityDestination buffer capacity
pInfoOutput container of IDNA processing details.
pErrorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
destination string length
Stable:
ICU 4.6

◆ uidna_nameToASCII()

U_CAPI int32_t uidna_nameToASCII ( const UIDNA idna,
const UChar name,
int32_t  length,
UChar dest,
int32_t  capacity,
UIDNAInfo pInfo,
UErrorCode pErrorCode 
)

Converts a whole domain name into its ASCII form for DNS lookup.

If any processing step fails, then pInfo->errors will be non-zero and the result might not be an ASCII string. The domain name might be modified according to the types of errors. Labels with severe errors will be left in (or turned into) their Unicode form.

The UErrorCode indicates an error only in exceptional cases, such as a U_MEMORY_ALLOCATION_ERROR.

Parameters
idnaUIDNA instance
nameInput domain name
lengthDomain name length, or -1 if NUL-terminated
destDestination string buffer
capacityDestination buffer capacity
pInfoOutput container of IDNA processing details.
pErrorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
destination string length
Stable:
ICU 4.6

◆ uidna_nameToASCII_UTF8()

U_CAPI int32_t uidna_nameToASCII_UTF8 ( const UIDNA idna,
const char *  name,
int32_t  length,
char *  dest,
int32_t  capacity,
UIDNAInfo pInfo,
UErrorCode pErrorCode 
)

Converts a whole domain name into its ASCII form for DNS lookup.

UTF-8 version of uidna_nameToASCII(), same behavior.

Parameters
idnaUIDNA instance
nameInput domain name
lengthDomain name length, or -1 if NUL-terminated
destDestination string buffer
capacityDestination buffer capacity
pInfoOutput container of IDNA processing details.
pErrorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
destination string length
Stable:
ICU 4.6

◆ uidna_nameToUnicode()

U_CAPI int32_t uidna_nameToUnicode ( const UIDNA idna,
const UChar name,
int32_t  length,
UChar dest,
int32_t  capacity,
UIDNAInfo pInfo,
UErrorCode pErrorCode 
)

Converts a whole domain name into its Unicode form for human-readable display.

If any processing step fails, then pInfo->errors will be non-zero. The domain name might be modified according to the types of errors.

The UErrorCode indicates an error only in exceptional cases, such as a U_MEMORY_ALLOCATION_ERROR.

Parameters
idnaUIDNA instance
nameInput domain name
lengthDomain name length, or -1 if NUL-terminated
destDestination string buffer
capacityDestination buffer capacity
pInfoOutput container of IDNA processing details.
pErrorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
destination string length
Stable:
ICU 4.6

◆ uidna_nameToUnicodeUTF8()

U_CAPI int32_t uidna_nameToUnicodeUTF8 ( const UIDNA idna,
const char *  name,
int32_t  length,
char *  dest,
int32_t  capacity,
UIDNAInfo pInfo,
UErrorCode pErrorCode 
)

Converts a whole domain name into its Unicode form for human-readable display.

UTF-8 version of uidna_nameToUnicode(), same behavior.

Parameters
idnaUIDNA instance
nameInput domain name
lengthDomain name length, or -1 if NUL-terminated
destDestination string buffer
capacityDestination buffer capacity
pInfoOutput container of IDNA processing details.
pErrorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
destination string length
Stable:
ICU 4.6

◆ uidna_openUTS46()

U_CAPI UIDNA* uidna_openUTS46 ( uint32_t  options,
UErrorCode pErrorCode 
)

Returns a UIDNA instance which implements UTS #46.

Returns an unmodifiable instance, owned by the caller. Cache it for multiple operations, and uidna_close() it when done. The instance is thread-safe, that is, it can be used concurrently.

For details about the UTS #46 implementation see the IDNA C++ class in idna.h.

Parameters
optionsBit set to modify the processing and error checking. See option bit set values in uidna.h.
pErrorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
the UTS #46 UIDNA instance, if successful
Stable:
ICU 4.6

◆ uidna_toASCII()

int32_t uidna_toASCII ( const UChar src,
int32_t  srcLength,
UChar dest,
int32_t  destCapacity,
int32_t  options,
UParseError parseError,
UErrorCode status 
)

IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.

This operation is done on single labels before sending it to something that expects ASCII names. A label is an individual part of a domain name. Labels are usually separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".

IDNA2003 API Overview:

The uidna_ API implements the IDNA protocol as defined in the IDNA RFC (http://www.ietf.org/rfc/rfc3490.txt). The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels containing non-ASCII code points are processed by the ToASCII operation before passing it to resolver libraries. Domain names that are obtained from resolver libraries are processed by the ToUnicode operation before displaying the domain name to the user. IDNA requires that implementations process input strings with Nameprep (http://www.ietf.org/rfc/rfc3491.txt), which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt), and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt). Implementations of IDNA MUST fully implement Nameprep and Punycode; neither Nameprep nor Punycode are optional. The input and output of ToASCII and ToUnicode operations are Unicode and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations multiple times to an input string will yield the same result as applying the operation once. ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).

Parameters
srcInput UChar array containing label in Unicode.
srcLengthNumber of UChars in src, or -1 if NUL-terminated.
destOutput UChar array with ASCII (ACE encoded) label.
destCapacitySize of dest.
optionsA bit set of options:
  • UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points and do not use STD3 ASCII rules If unassigned code points are found the operation fails with U_UNASSIGNED_ERROR error code.
  • UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations If this option is set, the unassigned code points are in the input are treated as normal Unicode code points.
  • UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions If this option is set and the input does not satisfy STD3 rules,
    the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
Parameters
parseErrorPointer to UParseError struct to receive information on position of error if an error is encountered. Can be NULL.
statusICU in/out error code parameter. U_INVALID_CHAR_FOUND if src contains unmatched single surrogates. U_INDEX_OUTOFBOUNDS_ERROR if src contains too many code points. U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
Returns
The length of the result string, if successful - or in case of a buffer overflow, in which case it will be greater than destCapacity.
Deprecated:
ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.

◆ uidna_toUnicode()

int32_t uidna_toUnicode ( const UChar src,
int32_t  srcLength,
UChar dest,
int32_t  destCapacity,
int32_t  options,
UParseError parseError,
UErrorCode status 
)

IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.

This operation is done on single labels before sending it to something that expects Unicode names. A label is an individual part of a domain name. Labels are usually separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".

Parameters
srcInput UChar array containing ASCII (ACE encoded) label.
srcLengthNumber of UChars in src, or -1 if NUL-terminated.
destOutput Converted UChar array containing Unicode equivalent of label.
destCapacitySize of dest.
optionsA bit set of options:
  • UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points and do not use STD3 ASCII rules If unassigned code points are found the operation fails with U_UNASSIGNED_ERROR error code.
  • UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations If this option is set, the unassigned code points are in the input are treated as normal Unicode code points. Note: This option is required on toUnicode operation because the RFC mandates verification of decoded ACE input by applying toASCII and comparing its output with source
  • UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions If this option is set and the input does not satisfy STD3 rules,
    the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
Parameters
parseErrorPointer to UParseError struct to receive information on position of error if an error is encountered. Can be NULL.
statusICU in/out error code parameter. U_INVALID_CHAR_FOUND if src contains unmatched single surrogates. U_INDEX_OUTOFBOUNDS_ERROR if src contains too many code points. U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
Returns
The length of the result string, if successful - or in case of a buffer overflow, in which case it will be greater than destCapacity.
Deprecated:
ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
UIDNA_INFO_INITIALIZER
#define UIDNA_INFO_INITIALIZER
Static initializer for a UIDNAInfo struct.
Definition: uidna.h:209
uidna_nameToASCII
U_CAPI int32_t uidna_nameToASCII(const UIDNA *idna, const UChar *name, int32_t length, UChar *dest, int32_t capacity, UIDNAInfo *pInfo, UErrorCode *pErrorCode)
Converts a whole domain name into its ASCII form for DNS lookup.
U_SUCCESS
#define U_SUCCESS(x)
Does the error code indicate success?
Definition: utypes.h:714
UIDNAInfo
Output container for IDNA processing errors.
Definition: uidna.h:185
UIDNAInfo::errors
uint32_t errors
Bit set indicating IDNA processing errors.
Definition: uidna.h:200