ELinks 0.16.1.1
charsets.c File Reference
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include "elinks.h"
#include "document/options.h"
#include "intl/charsets.h"
#include "util/conv.h"
#include "util/error.h"
#include "util/fastfind.h"
#include "util/hash.h"
#include "util/memory.h"
#include "util/string.h"
#include "osdep/osdep.h"
#include "intl/codepage.inc"
#include "intl/uni_7b.inc"
#include "intl/entity.inc"
Include dependency graph for charsets.c:

Data Structures

struct  table_entry
struct  codepage_desc
struct  entity_cache

Macros

#define _GNU_SOURCE   /* strcasecmp() */
#define table   table_elinks
#define BIN_SEARCH(table, entry, entries, key, result)
#define SYSTEM_CHARSET_FLAG   128
#define is_cp_ptr_utf8(cp_ptr)
#define utf8_issingle(p)
#define utf8_islead(p)
#define ENTITY_CACHE_SIZE   10 /* 10 seems a good value. */
#define ENTITY_CACHE_MAXLEN
#define PUTC

Functions

NONSTATIC_INLINE char * encode_utf8 (unicode_val_T u)
NONSTATIC_INLINE int utf8charlen (const char *p)
NONSTATIC_INLINE unicode_val_T utf8_to_unicode (char **string, const char *end)
static void free_translation_table (struct conv_table *p)
static void new_translation_table (struct conv_table *p)
static int is_codepoint_supported (unicode_val_T u)
int codepoint_replacement (unicode_val_T u)
const char * u2cp_ (unicode_val_T u, int to, enum nbsp_mode nbsp_mode)
int strlen_utf8 (char **str)
char * utf8_prevchar (char *current, int pos, char *start)
int utf8_char2cells (const char *utf8_char_const, char *end)
int utf8_ptr2cells (const char *string, char *end)
int utf8_ptr2chars (char *string, char *end)
int utf8_cells2bytes (const char *string, int max_cells, char *end)
char * utf8_step_forward (char *string, char *end, int max, enum utf8_step way, int *count)
char * utf8_step_backward (char *string, char *start, int max, enum utf8_step way, int *count)
unicode_val_T unicode_fold_label_case (unicode_val_T c)
static unicode_val_T cp2u_shared (const struct codepage_desc *from, unsigned char c)
unicode_val_T cp2u (int from, unsigned char c)
const char * cp2utf8 (int from, int c)
unicode_val_T cp_to_unicode (int codepage, char **string, const char *end)
static void add_utf8 (struct conv_table *ct, unicode_val_T u, const char *str)
static void free_utf_table (void)
static struct conv_tableget_translation_table_to_utf8 (int from)
void free_conv_table (void)
struct conv_tableget_translation_table (int from, int to)
static int xxstrcmp (const char *s1, const char *s2, int l2)
static int hits_cmp (const void *v1, const void *v2)
static int compare_entities (const void *key_, const void *element_)
const char * get_entity_string (const char *str, const int strlen, int encoding)
char * convert_string (struct conv_table *convert_table, const char *chars2, int charslen2, int cp, enum convert_string_mode mode, int *length, void(*callback)(void *data, char *buf, int buflen), void *callback_data)
void charsets_list_reset (void)
struct fastfind_key_valuecharsets_list_next (void)
int get_cp_index (const char *name)
void make_codepoints (void)
void init_charsets_lookup (void)
void free_charsets_lookup (void)
const char * get_cp_name (int cp_index)
const char * get_cp_config_name (int cp_index)
const char * get_cp_mime_name (int cp_index)
int is_cp_utf8 (int cp_index)
const uint16_t * get_cp_highhalf (const char *name)

Variables

static const char strings [256][2]
static const char no_str [] = "*"
struct { 
   int   size 
   unicode_val_T *   list 
codepoints
static const unicode_val_T strange_chars [32]
static char utf_buffer [7]
static const char utf8char_len_tab [256]
struct conv_table utf_table [256]
int utf_table_init = 1
static struct conv_table table [256]
static int first = 1
static unsigned int i_name = 0
static unsigned int i_alias = 0
static struct fastfind_index ff_charsets_index = INIT_FASTFIND_INDEX("charsets_lookup", charsets_list_reset, charsets_list_next)

Macro Definition Documentation

◆ _GNU_SOURCE

#define _GNU_SOURCE   /* strcasecmp() */

◆ BIN_SEARCH

#define BIN_SEARCH ( table,
entry,
entries,
key,
result )
Value:
{ \
long _s = 0, _e = (entries) - 1; \
\
while (_s <= _e || !((result) = -1)) { \
long _m = (_s + _e) / 2; \
\
if ((table)[_m].entry == (key)) { \
(result) = _m; \
break; \
} \
if ((table)[_m].entry > (key)) _e = _m - 1; \
if ((table)[_m].entry < (key)) _s = _m + 1; \
} \
} \
#define table
Definition kbdbind.c:27

◆ ENTITY_CACHE_MAXLEN

#define ENTITY_CACHE_MAXLEN
Value:
9 /* entities with length >= ENTITY_CACHE_MAXLEN or == 1
will go in [0] table */

◆ ENTITY_CACHE_SIZE

#define ENTITY_CACHE_SIZE   10 /* 10 seems a good value. */

◆ is_cp_ptr_utf8

#define is_cp_ptr_utf8 ( cp_ptr)
Value:
((cp_ptr)->aliases == aliases_utf8)

◆ PUTC

#define PUTC
Value:
do { \
buffer[bufferpos++] = chars[charspos++]; \
translit = ""; \
goto flush; \
} while (0)

◆ SYSTEM_CHARSET_FLAG

#define SYSTEM_CHARSET_FLAG   128

◆ table

#define table   table_elinks

◆ utf8_islead

#define utf8_islead ( p)
Value:
(utf8_issingle(p) || ((p) & 0xc0) == 0xc0)
#define utf8_issingle(p)
Definition charsets.c:378

◆ utf8_issingle

#define utf8_issingle ( p)
Value:
(((p) & 0x80) == 0)

Function Documentation

◆ add_utf8()

void add_utf8 ( struct conv_table * ct,
unicode_val_T u,
const char * str )
static

◆ charsets_list_next()

struct fastfind_key_value * charsets_list_next ( void )

◆ charsets_list_reset()

void charsets_list_reset ( void )

◆ codepoint_replacement()

int codepoint_replacement ( unicode_val_T u)

◆ compare_entities()

int compare_entities ( const void * key_,
const void * element_ )
static

◆ convert_string()

char * convert_string ( struct conv_table * convert_table,
const char * chars2,
int charslen2,
int cp,
enum convert_string_mode mode,
int * length,
void(* callback )(void *data, char *buf, int buflen),
void * callback_data )

◆ cp2u()

unicode_val_T cp2u ( int from,
unsigned char c )

◆ cp2u_shared()

unicode_val_T cp2u_shared ( const struct codepage_desc * from,
unsigned char c )
static

◆ cp2utf8()

const char * cp2utf8 ( int from,
int c )

◆ cp_to_unicode()

unicode_val_T cp_to_unicode ( int codepage,
char ** string,
const char * end )

◆ encode_utf8()

NONSTATIC_INLINE char * encode_utf8 ( unicode_val_T u)

◆ free_charsets_lookup()

void free_charsets_lookup ( void )

◆ free_conv_table()

void free_conv_table ( void )

◆ free_translation_table()

void free_translation_table ( struct conv_table * p)
static

◆ free_utf_table()

void free_utf_table ( void )
static

◆ get_cp_config_name()

const char * get_cp_config_name ( int cp_index)

◆ get_cp_highhalf()

const uint16_t * get_cp_highhalf ( const char * name)

◆ get_cp_index()

int get_cp_index ( const char * name)

◆ get_cp_mime_name()

const char * get_cp_mime_name ( int cp_index)

◆ get_cp_name()

const char * get_cp_name ( int cp_index)

◆ get_entity_string()

const char * get_entity_string ( const char * str,
const int strlen,
int encoding )

◆ get_translation_table()

struct conv_table * get_translation_table ( int from,
int to )

◆ get_translation_table_to_utf8()

struct conv_table * get_translation_table_to_utf8 ( int from)
static

◆ hits_cmp()

int hits_cmp ( const void * v1,
const void * v2 )
static

◆ init_charsets_lookup()

void init_charsets_lookup ( void )

◆ is_codepoint_supported()

int is_codepoint_supported ( unicode_val_T u)
static

◆ is_cp_utf8()

int is_cp_utf8 ( int cp_index)

◆ make_codepoints()

void make_codepoints ( void )

◆ new_translation_table()

void new_translation_table ( struct conv_table * p)
static

◆ strlen_utf8()

int strlen_utf8 ( char ** str)

◆ u2cp_()

const char * u2cp_ ( unicode_val_T u,
int to,
enum nbsp_mode nbsp_mode  )

◆ unicode_fold_label_case()

unicode_val_T unicode_fold_label_case ( unicode_val_T c)

◆ utf8_cells2bytes()

int utf8_cells2bytes ( const char * string,
int max_cells,
char * end )

◆ utf8_char2cells()

int utf8_char2cells ( const char * utf8_char_const,
char * end )

◆ utf8_prevchar()

char * utf8_prevchar ( char * current,
int pos,
char * start )

◆ utf8_ptr2cells()

int utf8_ptr2cells ( const char * string,
char * end )

◆ utf8_ptr2chars()

int utf8_ptr2chars ( char * string,
char * end )

◆ utf8_step_backward()

char * utf8_step_backward ( char * string,
char * start,
int max,
enum utf8_step way,
int * count )

◆ utf8_step_forward()

char * utf8_step_forward ( char * string,
char * end,
int max,
enum utf8_step way,
int * count )

◆ utf8_to_unicode()

NONSTATIC_INLINE unicode_val_T utf8_to_unicode ( char ** string,
const char * end )

◆ utf8charlen()

NONSTATIC_INLINE int utf8charlen ( const char * p)

◆ xxstrcmp()

int xxstrcmp ( const char * s1,
const char * s2,
int l2 )
inlinestatic

Variable Documentation

◆ [struct]

struct { ... } codepoints

◆ ff_charsets_index

struct fastfind_index ff_charsets_index = INIT_FASTFIND_INDEX("charsets_lookup", charsets_list_reset, charsets_list_next)
static

◆ first

int first = 1
static

◆ i_alias

unsigned int i_alias = 0
static

◆ i_name

unsigned int i_name = 0
static

◆ list

◆ no_str

const char no_str[] = "*"
static

◆ size

int size

◆ strange_chars

const unicode_val_T strange_chars[32]
static
Initial value:
= {
0x20ac, 0x0000, 0x002a, 0x0000, 0x201e, 0x2026, 0x2020, 0x2021,
0x005e, 0x2030, 0x0160, 0x003c, 0x0152, 0x0000, 0x0000, 0x0000,
0x0000, 0x0060, 0x0027, 0x0022, 0x0022, 0x002a, 0x2013, 0x2014,
0x007e, 0x2122, 0x0161, 0x003e, 0x0153, 0x0000, 0x0000, 0x0000,
}

◆ strings

const char strings[256][2]
static

◆ table

struct conv_table table[256]
static

◆ utf8char_len_tab

const char utf8char_len_tab[256]
static
Initial value:
= {
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
}

◆ utf_buffer

char utf_buffer[7]
static

◆ utf_table

struct conv_table utf_table[256]

◆ utf_table_init

int utf_table_init = 1