1 // Written in the D programming language. 2 3 /++ 4 Functions which operate on ASCII characters. 5 6 All of the functions in std._ascii accept Unicode characters but 7 effectively ignore them if they're not ASCII. All $(D isX) functions return 8 $(D false) for non-ASCII characters, and all $(D toX) functions do nothing 9 to non-ASCII characters. 10 11 For functions which operate on Unicode characters, see 12 $(MREF std, uni). 13 14 $(SCRIPT inhibitQuickIndex = 1;) 15 $(DIVC quickindex, 16 $(BOOKTABLE, 17 $(TR $(TH Category) $(TH Functions)) 18 $(TR $(TD Validation) $(TD 19 $(LREF isAlpha) 20 $(LREF isAlphaNum) 21 $(LREF isASCII) 22 $(LREF isControl) 23 $(LREF isDigit) 24 $(LREF isGraphical) 25 $(LREF isHexDigit) 26 $(LREF isOctalDigit) 27 $(LREF isPrintable) 28 $(LREF isPunctuation) 29 $(LREF isUpper) 30 $(LREF isWhite) 31 )) 32 $(TR $(TD Conversions) $(TD 33 $(LREF toLower) 34 $(LREF toUpper) 35 )) 36 $(TR $(TD Constants) $(TD 37 $(LREF digits) 38 $(LREF fullHexDigits) 39 $(LREF hexDigits) 40 $(LREF letters) 41 $(LREF lowercase) 42 $(LREF lowerHexDigits) 43 $(LREF newline) 44 $(LREF octalDigits) 45 $(LREF uppercase) 46 $(LREF whitespace) 47 )) 48 $(TR $(TD Enums) $(TD 49 $(LREF LetterCase) 50 )) 51 )) 52 References: 53 $(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table), 54 $(HTTP en.wikipedia.org/wiki/Ascii, Wikipedia) 55 56 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 57 Authors: $(HTTP digitalmars.com, Walter Bright) and Jonathan M Davis 58 Source: $(PHOBOSSRC std/_ascii.d) 59 +/ 60 module std.ascii; 61 62 version (unittest) 63 { 64 // FIXME: When dmd bug #314 is fixed, make these selective. 65 import std.meta; // : AliasSeq; 66 import std.range; // : chain; 67 import std.traits; // : functionAttributes, FunctionAttribute, isSafe; 68 } 69 70 71 immutable fullHexDigits = "0123456789ABCDEFabcdef"; /// 0 .. 9A .. Fa .. f 72 immutable hexDigits = fullHexDigits[0 .. 16]; /// 0 .. 9A .. F 73 immutable lowerHexDigits = "0123456789abcdef"; /// 0 .. 9a .. f 74 immutable digits = hexDigits[0 .. 10]; /// 0 .. 9 75 immutable octalDigits = digits[0 .. 8]; /// 0 .. 7 76 immutable letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; /// A .. Za .. z 77 immutable uppercase = letters[0 .. 26]; /// A .. Z 78 immutable lowercase = letters[26 .. 52]; /// a .. z 79 immutable whitespace = " \t\v\r\n\f"; /// ASCII _whitespace 80 81 /++ 82 Letter case specifier. 83 +/ 84 enum LetterCase : bool 85 { 86 upper, /// Upper case letters 87 lower /// Lower case letters 88 } 89 90 /// Newline sequence for this system. 91 version(Windows) 92 immutable newline = "\r\n"; 93 else version(Posix) 94 immutable newline = "\n"; 95 else 96 static assert(0, "Unsupported OS"); 97 98 99 /++ 100 Params: c = The character to test. 101 Returns: Whether $(D c) is a letter or a number (0 .. 9, a .. z, A .. Z). 102 +/ 103 bool isAlphaNum(dchar c) @safe pure nothrow @nogc 104 { 105 return c <= 'z' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'Z')); 106 } 107 108 /// 109 @safe pure nothrow @nogc unittest 110 { 111 assert( isAlphaNum('A')); 112 assert( isAlphaNum('1')); 113 assert(!isAlphaNum('#')); 114 115 // N.B.: does not return true for non-ASCII Unicode alphanumerics: 116 assert(!isAlphaNum('á')); 117 } 118 119 @safe unittest 120 { 121 foreach (c; chain(digits, octalDigits, fullHexDigits, letters, lowercase, uppercase)) 122 assert(isAlphaNum(c)); 123 124 foreach (c; whitespace) 125 assert(!isAlphaNum(c)); 126 } 127 128 129 /++ 130 Params: c = The character to test. 131 Returns: Whether $(D c) is an ASCII letter (A .. Z, a .. z). 132 +/ 133 bool isAlpha(dchar c) @safe pure nothrow @nogc 134 { 135 // Optimizer can turn this into a bitmask operation on 64 bit code 136 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); 137 } 138 139 /// 140 @safe pure nothrow @nogc unittest 141 { 142 assert( isAlpha('A')); 143 assert(!isAlpha('1')); 144 assert(!isAlpha('#')); 145 146 // N.B.: does not return true for non-ASCII Unicode alphabetic characters: 147 assert(!isAlpha('á')); 148 } 149 150 @safe unittest 151 { 152 foreach (c; chain(letters, lowercase, uppercase)) 153 assert(isAlpha(c)); 154 155 foreach (c; chain(digits, octalDigits, whitespace)) 156 assert(!isAlpha(c)); 157 } 158 159 160 /++ 161 Params: c = The character to test. 162 Returns: Whether $(D c) is a lowercase ASCII letter (a .. z). 163 +/ 164 bool isLower(dchar c) @safe pure nothrow @nogc 165 { 166 return c >= 'a' && c <= 'z'; 167 } 168 169 /// 170 @safe pure nothrow @nogc unittest 171 { 172 assert( isLower('a')); 173 assert(!isLower('A')); 174 assert(!isLower('#')); 175 176 // N.B.: does not return true for non-ASCII Unicode lowercase letters 177 assert(!isLower('á')); 178 assert(!isLower('Á')); 179 } 180 181 @safe unittest 182 { 183 foreach (c; lowercase) 184 assert(isLower(c)); 185 186 foreach (c; chain(digits, uppercase, whitespace)) 187 assert(!isLower(c)); 188 } 189 190 191 /++ 192 Params: c = The character to test. 193 Returns: Whether $(D c) is an uppercase ASCII letter (A .. Z). 194 +/ 195 bool isUpper(dchar c) @safe pure nothrow @nogc 196 { 197 return c <= 'Z' && 'A' <= c; 198 } 199 200 /// 201 @safe pure nothrow @nogc unittest 202 { 203 assert( isUpper('A')); 204 assert(!isUpper('a')); 205 assert(!isUpper('#')); 206 207 // N.B.: does not return true for non-ASCII Unicode uppercase letters 208 assert(!isUpper('á')); 209 assert(!isUpper('Á')); 210 } 211 212 @safe unittest 213 { 214 foreach (c; uppercase) 215 assert(isUpper(c)); 216 217 foreach (c; chain(digits, lowercase, whitespace)) 218 assert(!isUpper(c)); 219 } 220 221 222 /++ 223 Params: c = The character to test. 224 Returns: Whether $(D c) is a digit (0 .. 9). 225 +/ 226 bool isDigit(dchar c) @safe pure nothrow @nogc 227 { 228 return '0' <= c && c <= '9'; 229 } 230 231 /// 232 @safe pure nothrow @nogc unittest 233 { 234 assert( isDigit('3')); 235 assert( isDigit('8')); 236 assert(!isDigit('B')); 237 assert(!isDigit('#')); 238 239 // N.B.: does not return true for non-ASCII Unicode numbers 240 assert(!isDigit('0')); // full-width digit zero (U+FF10) 241 assert(!isDigit('4')); // full-width digit four (U+FF14) 242 } 243 244 @safe unittest 245 { 246 foreach (c; digits) 247 assert(isDigit(c)); 248 249 foreach (c; chain(letters, whitespace)) 250 assert(!isDigit(c)); 251 } 252 253 254 /++ 255 Params: c = The character to test. 256 Returns: Whether $(D c) is a digit in base 8 (0 .. 7). 257 +/ 258 bool isOctalDigit(dchar c) @safe pure nothrow @nogc 259 { 260 return c >= '0' && c <= '7'; 261 } 262 263 /// 264 @safe pure nothrow @nogc unittest 265 { 266 assert( isOctalDigit('0')); 267 assert( isOctalDigit('7')); 268 assert(!isOctalDigit('8')); 269 assert(!isOctalDigit('A')); 270 assert(!isOctalDigit('#')); 271 } 272 273 @safe unittest 274 { 275 foreach (c; octalDigits) 276 assert(isOctalDigit(c)); 277 278 foreach (c; chain(letters, ['8', '9'], whitespace)) 279 assert(!isOctalDigit(c)); 280 } 281 282 283 /++ 284 Params: c = The character to test. 285 Returns: Whether $(D c) is a digit in base 16 (0 .. 9, A .. F, a .. f). 286 +/ 287 bool isHexDigit(dchar c) @safe pure nothrow @nogc 288 { 289 return c <= 'f' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'F')); 290 } 291 292 /// 293 @safe pure nothrow @nogc unittest 294 { 295 assert( isHexDigit('0')); 296 assert( isHexDigit('A')); 297 assert( isHexDigit('f')); // lowercase hex digits are accepted 298 assert(!isHexDigit('g')); 299 assert(!isHexDigit('G')); 300 assert(!isHexDigit('#')); 301 } 302 303 @safe unittest 304 { 305 foreach (c; fullHexDigits) 306 assert(isHexDigit(c)); 307 308 foreach (c; chain(lowercase[6 .. $], uppercase[6 .. $], whitespace)) 309 assert(!isHexDigit(c)); 310 } 311 312 313 /++ 314 Params: c = The character to test. 315 Returns: Whether or not $(D c) is a whitespace character. That includes the 316 space, tab, vertical tab, form feed, carriage return, and linefeed 317 characters. 318 +/ 319 bool isWhite(dchar c) @safe pure nothrow @nogc 320 { 321 return c == ' ' || (c >= 0x09 && c <= 0x0D); 322 } 323 324 /// 325 @safe pure nothrow @nogc unittest 326 { 327 assert( isWhite(' ')); 328 assert( isWhite('\t')); 329 assert( isWhite('\n')); 330 assert(!isWhite('1')); 331 assert(!isWhite('a')); 332 assert(!isWhite('#')); 333 334 // N.B.: Does not return true for non-ASCII Unicode whitespace characters. 335 static import std.uni; 336 assert(std.uni.isWhite('\u00A0')); 337 assert(!isWhite('\u00A0')); // std.ascii.isWhite 338 } 339 340 @safe unittest 341 { 342 foreach (c; whitespace) 343 assert(isWhite(c)); 344 345 foreach (c; chain(digits, letters)) 346 assert(!isWhite(c)); 347 } 348 349 350 /++ 351 Params: c = The character to test. 352 Returns: Whether $(D c) is a control character. 353 +/ 354 bool isControl(dchar c) @safe pure nothrow @nogc 355 { 356 return c < 0x20 || c == 0x7F; 357 } 358 359 /// 360 @safe pure nothrow @nogc unittest 361 { 362 assert( isControl('\0')); 363 assert( isControl('\022')); 364 assert( isControl('\n')); // newline is both whitespace and control 365 assert(!isControl(' ')); 366 assert(!isControl('1')); 367 assert(!isControl('a')); 368 assert(!isControl('#')); 369 370 // N.B.: non-ASCII Unicode control characters are not recognized: 371 assert(!isControl('\u0080')); 372 assert(!isControl('\u2028')); 373 assert(!isControl('\u2029')); 374 } 375 376 @safe unittest 377 { 378 foreach (dchar c; 0 .. 32) 379 assert(isControl(c)); 380 assert(isControl(127)); 381 382 foreach (c; chain(digits, letters, [' '])) 383 assert(!isControl(c)); 384 } 385 386 387 /++ 388 Params: c = The character to test. 389 Returns: Whether or not $(D c) is a punctuation character. That includes 390 all ASCII characters which are not control characters, letters, digits, or 391 whitespace. 392 +/ 393 bool isPunctuation(dchar c) @safe pure nothrow @nogc 394 { 395 return c <= '~' && c >= '!' && !isAlphaNum(c); 396 } 397 398 /// 399 @safe pure nothrow @nogc unittest 400 { 401 assert( isPunctuation('.')); 402 assert( isPunctuation(',')); 403 assert( isPunctuation(':')); 404 assert( isPunctuation('!')); 405 assert( isPunctuation('#')); 406 assert( isPunctuation('~')); 407 assert( isPunctuation('+')); 408 assert( isPunctuation('_')); 409 410 assert(!isPunctuation('1')); 411 assert(!isPunctuation('a')); 412 assert(!isPunctuation(' ')); 413 assert(!isPunctuation('\n')); 414 assert(!isPunctuation('\0')); 415 416 // N.B.: Non-ASCII Unicode punctuation characters are not recognized. 417 assert(!isPunctuation('\u2012')); // (U+2012 = en-dash) 418 } 419 420 @safe unittest 421 { 422 foreach (dchar c; 0 .. 128) 423 { 424 if (isControl(c) || isAlphaNum(c) || c == ' ') 425 assert(!isPunctuation(c)); 426 else 427 assert(isPunctuation(c)); 428 } 429 } 430 431 432 /++ 433 Params: c = The character to test. 434 Returns: Whether or not $(D c) is a printable character other than the 435 space character. 436 +/ 437 bool isGraphical(dchar c) @safe pure nothrow @nogc 438 { 439 return '!' <= c && c <= '~'; 440 } 441 442 /// 443 @safe pure nothrow @nogc unittest 444 { 445 assert( isGraphical('1')); 446 assert( isGraphical('a')); 447 assert( isGraphical('#')); 448 assert(!isGraphical(' ')); // whitespace is not graphical 449 assert(!isGraphical('\n')); 450 assert(!isGraphical('\0')); 451 452 // N.B.: Unicode graphical characters are not regarded as such. 453 assert(!isGraphical('á')); 454 } 455 456 @safe unittest 457 { 458 foreach (dchar c; 0 .. 128) 459 { 460 if (isControl(c) || c == ' ') 461 assert(!isGraphical(c)); 462 else 463 assert(isGraphical(c)); 464 } 465 } 466 467 468 /++ 469 Params: c = The character to test. 470 Returns: Whether or not $(D c) is a printable character - including the 471 space character. 472 +/ 473 bool isPrintable(dchar c) @safe pure nothrow @nogc 474 { 475 return c >= ' ' && c <= '~'; 476 } 477 478 /// 479 @safe pure nothrow @nogc unittest 480 { 481 assert( isPrintable(' ')); // whitespace is printable 482 assert( isPrintable('1')); 483 assert( isPrintable('a')); 484 assert( isPrintable('#')); 485 assert(!isPrintable('\0')); // control characters are not printable 486 487 // N.B.: Printable non-ASCII Unicode characters are not recognized. 488 assert(!isPrintable('á')); 489 } 490 491 @safe unittest 492 { 493 foreach (dchar c; 0 .. 128) 494 { 495 if (isControl(c)) 496 assert(!isPrintable(c)); 497 else 498 assert(isPrintable(c)); 499 } 500 } 501 502 503 /++ 504 Params: c = The character to test. 505 Returns: Whether or not $(D c) is in the ASCII character set - i.e. in the 506 range 0 .. 0x7F. 507 +/ 508 pragma(inline, true) 509 bool isASCII(dchar c) @safe pure nothrow @nogc 510 { 511 return c <= 0x7F; 512 } 513 514 /// 515 @safe pure nothrow @nogc unittest 516 { 517 assert( isASCII('a')); 518 assert(!isASCII('á')); 519 } 520 521 @safe unittest 522 { 523 foreach (dchar c; 0 .. 128) 524 assert(isASCII(c)); 525 526 assert(!isASCII(128)); 527 } 528 529 530 /++ 531 Converts an ASCII letter to lowercase. 532 533 Params: c = A character of any type that implicitly converts to $(D dchar). 534 In the case where it's a built-in type, or an enum of a built-in type, 535 $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined 536 type, $(D dchar) is returned. 537 538 Returns: The corresponding lowercase letter, if $(D c) is an uppercase 539 ASCII character, otherwise $(D c) itself. 540 +/ 541 auto toLower(C)(C c) 542 if (is(C : dchar)) 543 { 544 import std.traits : isAggregateType, OriginalType, Unqual; 545 546 alias OC = OriginalType!C; 547 static if (isAggregateType!OC) 548 alias R = dchar; 549 else 550 alias R = Unqual!OC; 551 552 return isUpper(c) ? cast(R)(cast(R) c + 'a' - 'A') : cast(R) c; 553 } 554 555 /// 556 @safe pure nothrow @nogc unittest 557 { 558 assert(toLower('a') == 'a'); 559 assert(toLower('A') == 'a'); 560 assert(toLower('#') == '#'); 561 562 // N.B.: Non-ASCII Unicode uppercase letters are not converted. 563 assert(toLower('Á') == 'Á'); 564 } 565 566 @safe pure nothrow unittest 567 { 568 569 foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte)) 570 { 571 foreach (i, c; uppercase) 572 assert(toLower(cast(C) c) == lowercase[i]); 573 574 foreach (C c; 0 .. 128) 575 { 576 if (c < 'A' || c > 'Z') 577 assert(toLower(c) == c); 578 else 579 assert(toLower(c) != c); 580 } 581 582 foreach (C c; 128 .. C.max) 583 assert(toLower(c) == c); 584 585 //CTFE 586 static assert(toLower(cast(C)'a') == 'a'); 587 static assert(toLower(cast(C)'A') == 'a'); 588 } 589 } 590 591 592 /++ 593 Converts an ASCII letter to uppercase. 594 595 Params: c = Any type which implicitly converts to $(D dchar). In the case 596 where it's a built-in type, or an enum of a built-in type, 597 $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined 598 type, $(D dchar) is returned. 599 600 Returns: The corresponding uppercase letter, if $(D c) is a lowercase ASCII 601 character, otherwise $(D c) itself. 602 +/ 603 auto toUpper(C)(C c) 604 if (is(C : dchar)) 605 { 606 import std.traits : isAggregateType, OriginalType, Unqual; 607 608 alias OC = OriginalType!C; 609 static if (isAggregateType!OC) 610 alias R = dchar; 611 else 612 alias R = Unqual!OC; 613 614 return isLower(c) ? cast(R)(cast(R) c - ('a' - 'A')) : cast(R) c; 615 } 616 617 /// 618 @safe pure nothrow @nogc unittest 619 { 620 assert(toUpper('a') == 'A'); 621 assert(toUpper('A') == 'A'); 622 assert(toUpper('#') == '#'); 623 624 // N.B.: Non-ASCII Unicode lowercase letters are not converted. 625 assert(toUpper('á') == 'á'); 626 } 627 628 @safe pure nothrow unittest 629 { 630 foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte)) 631 { 632 foreach (i, c; lowercase) 633 assert(toUpper(cast(C) c) == uppercase[i]); 634 635 foreach (C c; 0 .. 128) 636 { 637 if (c < 'a' || c > 'z') 638 assert(toUpper(c) == c); 639 else 640 assert(toUpper(c) != c); 641 } 642 643 foreach (C c; 128 .. C.max) 644 assert(toUpper(c) == c); 645 646 //CTFE 647 static assert(toUpper(cast(C)'a') == 'A'); 648 static assert(toUpper(cast(C)'A') == 'A'); 649 } 650 } 651 652 653 @safe unittest //Test both toUpper and toLower with non-builtin 654 { 655 //User Defined [Char|Wchar|Dchar] 656 static struct UDC { char c; alias c this; } 657 static struct UDW { wchar c; alias c this; } 658 static struct UDD { dchar c; alias c this; } 659 //[Char|Wchar|Dchar] Enum 660 enum CE : char {a = 'a', A = 'A'} 661 enum WE : wchar {a = 'a', A = 'A'} 662 enum DE : dchar {a = 'a', A = 'A'} 663 //User Defined [Char|Wchar|Dchar] Enum 664 enum UDCE : UDC {a = UDC('a'), A = UDC('A')} 665 enum UDWE : UDW {a = UDW('a'), A = UDW('A')} 666 enum UDDE : UDD {a = UDD('a'), A = UDD('A')} 667 668 //User defined types with implicit cast to dchar test. 669 foreach (Char; AliasSeq!(UDC, UDW, UDD)) 670 { 671 assert(toLower(Char('a')) == 'a'); 672 assert(toLower(Char('A')) == 'a'); 673 static assert(toLower(Char('a')) == 'a'); 674 static assert(toLower(Char('A')) == 'a'); 675 static assert(toUpper(Char('a')) == 'A'); 676 static assert(toUpper(Char('A')) == 'A'); 677 } 678 679 //Various enum tests. 680 foreach (Enum; AliasSeq!(CE, WE, DE, UDCE, UDWE, UDDE)) 681 { 682 assert(toLower(Enum.a) == 'a'); 683 assert(toLower(Enum.A) == 'a'); 684 assert(toUpper(Enum.a) == 'A'); 685 assert(toUpper(Enum.A) == 'A'); 686 static assert(toLower(Enum.a) == 'a'); 687 static assert(toLower(Enum.A) == 'a'); 688 static assert(toUpper(Enum.a) == 'A'); 689 static assert(toUpper(Enum.A) == 'A'); 690 } 691 692 //Return value type tests for enum of non-UDT. These should be the original type. 693 foreach (T; AliasSeq!(CE, WE, DE)) 694 { 695 alias C = OriginalType!T; 696 static assert(is(typeof(toLower(T.init)) == C)); 697 static assert(is(typeof(toUpper(T.init)) == C)); 698 } 699 700 //Return value tests for UDT and enum of UDT. These should be dchar 701 foreach (T; AliasSeq!(UDC, UDW, UDD, UDCE, UDWE, UDDE)) 702 { 703 static assert(is(typeof(toLower(T.init)) == dchar)); 704 static assert(is(typeof(toUpper(T.init)) == dchar)); 705 } 706 }