RESTinio
utf8_checker.hpp
Go to the documentation of this file.
1 /*
2  * RESTinio
3  */
4 
12 #pragma once
13 
15 
16 #include <cstdint>
17 
18 namespace restinio
19 {
20 
21 namespace utils
22 {
23 
24 //
25 // utf8_checker_t
26 //
27 
35 {
36 public:
37  utf8_checker_t() = default;
38 
40  bool
41  process_byte( std::uint8_t byte ) noexcept
42  {
43  check_overlong( byte );
44 
46  {
47  // check byte is 10xxxxxx.
48  if( (byte & 0xC0) == 0x80 )
49  {
50  m_current_symbol <<= 6;
51  byte &= 0x3F;
52 
54 
55  if( --m_current_symbol_rest_bytes == 0 )
56  {
58  }
59  }
60  else
61  {
63  }
64  }
65  else
66  {
67  m_current_symbol = 0;
68 
69  if( (byte & 0x80) == 0x00)
70  {
71  // mask 0xxxxxxx
73  }
74  else if( (byte & 0xE0) == 0xC0)
75  {
76  // mask 110xxxxx
78  byte &= 0x1F;
79  }
80  else if( (byte & 0xF0) == 0xE0)
81  {
82  // mask 1110xxxx
84  byte &= 0xF;
85  }
86  else if( (byte & 0xF8) == 0xF0)
87  {
88  // mask 11110xxx
90  byte &= 0x7;
91  }
92  else if( (byte & 0xFC) == 0xF8)
93  {
94  // mask 111110xx
96  byte &= 0x3;
97  }
98  else if( (byte & 0xFE) == 0xFC)
99  {
100  // mask 1111110x
102  byte &= 0x1;
103  }
104  else
105  {
107  }
108 
110  }
111 
113  }
114 
119  bool
120  finalized() const noexcept
121  {
122  return m_current_symbol_rest_bytes == 0;
123  }
124 
125  void
126  reset() noexcept
127  {
128  m_current_symbol = 0;
130  }
131 
133  std::uint32_t
134  current_symbol() const noexcept { return m_current_symbol; }
135 
136 private:
137 
138  void
140  {
141  if( (m_current_symbol >= 0xD800 && m_current_symbol <= 0xDFFF) ||
142  (m_current_symbol >= 0x110000) )
143  {
145  }
146  }
147 
148  void
149  check_overlong( std::uint8_t byte ) noexcept
150  {
151  if( m_current_symbol_rest_bytes > 0 &&
153  {
154  if( m_current_symbol_rest_bytes == 2 &&
155  (byte & 0xE0) == 0x80 )
157  else if( m_current_symbol_rest_bytes == 3 &&
158  (byte & 0xF0) == 0x80 )
160  else if( m_current_symbol_rest_bytes == 4 &&
161  (byte & 0xF8) == 0x80 )
163  else if( m_current_symbol_rest_bytes == 5 &&
164  (byte & 0xFC) == 0x80 )
166  else
168  }
169  else
170  {
171  if( byte == 0xC0 || byte == 0xC1 )
172  {
174  }
175  else if( byte == 0xE0 )
176  {
178  }
179  else if( byte == 0xF0 )
180  {
182  }
183  if( byte == 0xF8 )
184  {
186  }
187  if( byte == 0xFC )
188  {
190  }
191  }
192  }
193 
194  std::uint32_t m_current_symbol = 0u;
195 
196  std::size_t m_current_symbol_rest_bytes = 0u;
197 
198  enum class state_t
199  {
200  valid,
201  invalid,
203  overlong
204  };
205 
207 };
208 
209 } /* namespace utils */
210 
211 } /* namespace restinio */
212 
restinio::utils::utf8_checker_t::current_symbol
RESTINIO_NODISCARD std::uint32_t current_symbol() const noexcept
Definition: utf8_checker.hpp:134
RESTINIO_NODISCARD
#define RESTINIO_NODISCARD
Definition: compiler_features.hpp:33
restinio::utils::utf8_checker_t::m_state
state_t m_state
Definition: utf8_checker.hpp:206
restinio::utils::utf8_checker_t::state_t::valid
@ valid
restinio::utils::utf8_checker_t::m_current_symbol_rest_bytes
std::size_t m_current_symbol_rest_bytes
Definition: utf8_checker.hpp:196
restinio::utils::utf8_checker_t::m_current_symbol
std::uint32_t m_current_symbol
Definition: utf8_checker.hpp:194
restinio::utils::utf8_checker_t::state_t::may_be_overlong
@ may_be_overlong
restinio::utils::utf8_checker_t::state_t::overlong
@ overlong
restinio::utils::utf8_checker_t::process_byte
RESTINIO_NODISCARD bool process_byte(std::uint8_t byte) noexcept
Definition: utf8_checker.hpp:41
restinio::utils::utf8_checker_t::state_t::invalid
@ invalid
restinio::utils::utf8_checker_t::finalized
RESTINIO_NODISCARD bool finalized() const noexcept
Definition: utf8_checker.hpp:120
restinio::utils::utf8_checker_t::validate_current_symbol
void validate_current_symbol() noexcept
Definition: utf8_checker.hpp:139
restinio::utils::utf8_checker_t
Helper class for checking UTF-8 byte sequence during parsing URI or incoming byte stream.
Definition: utf8_checker.hpp:35
restinio::utils::utf8_checker_t::state_t
state_t
Definition: utf8_checker.hpp:199
restinio::utils::utf8_checker_t::reset
void reset() noexcept
Definition: utf8_checker.hpp:126
restinio::utils::sha1::details::byte
unsigned int byte(digest_t::value_type v)
Definition: sha1.hpp:365
restinio
Definition: asio_include.hpp:21
compiler_features.hpp
Detection of compiler version and absence of various features.
restinio::utils::utf8_checker_t::check_overlong
void check_overlong(std::uint8_t byte) noexcept
Definition: utf8_checker.hpp:149
const
#define const
Definition: zconf.h:230
restinio::utils::utf8_checker_t::utf8_checker_t
utf8_checker_t()=default