001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.openstreetmap.josm.data.validation.routines;
018
019import java.util.Arrays;
020import java.util.Locale;
021
022/**
023 * <p><b>Domain name</b> validation routines.</p>
024 *
025 * <p>
026 * This validator provides methods for validating Internet domain names
027 * and top-level domains.
028 * </p>
029 *
030 * <p>Domain names are evaluated according
031 * to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>,
032 * section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>,
033 * section 2.1. No accomodation is provided for the specialized needs of
034 * other applications; if the domain name has been URL-encoded, for example,
035 * validation will fail even though the equivalent plaintext version of the
036 * same name would have passed.
037 * </p>
038 *
039 * <p>
040 * Validation is also provided for top-level domains (TLDs) as defined and
041 * maintained by the Internet Assigned Numbers Authority (IANA):
042 * </p>
043 *
044 *   <ul>
045 *     <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs
046 *         (<code>.arpa</code>, etc.)</li>
047 *     <li>{@link #isValidGenericTld} - validates generic TLDs
048 *         (<code>.com, .org</code>, etc.)</li>
049 *     <li>{@link #isValidIdnTld} - validates IDN TLDs
050 *         (<code>.xn--*</code>, etc.)</li>
051 *     <li>{@link #isValidCountryCodeTld} - validates country code TLDs
052 *         (<code>.us, .uk, .cn</code>, etc.)</li>
053 *   </ul>
054 *
055 * <p>
056 * (<b>NOTE</b>: This class does not provide IP address lookup for domain names or
057 * methods to ensure that a given domain name matches a specific IP; see
058 * {@link java.net.InetAddress} for that functionality.)
059 * </p>
060 *
061 * @version $Revision: 1640271 $ $Date: 2014-11-18 02:32:15 2014 UTC (Tue, 18 Nov 2014) $
062 * @since Validator 1.4
063 */
064public final class DomainValidator extends AbstractValidator {
065
066    // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123)
067    private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*";
068    private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}";
069    // JOSM PATCH BEGIN
070    // See #10862 - IDN TLDs in ASCII form
071    private static final String TOP_LABEL_IDN_REGEX = "(?:xn|XN)--\\p{Alnum}{2,}(?:-\\p{Alpha}{2,})?";
072    private static final String DOMAIN_NAME_REGEX =
073            "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + "|" + TOP_LABEL_IDN_REGEX + ")$";
074    // JOSM PATCH END
075
076    private final boolean allowLocal;
077
078    /**
079     * Singleton instance of this validator, which
080     *  doesn't consider local addresses as valid.
081     */
082    private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false);
083
084    /**
085     * Singleton instance of this validator, which does
086     *  consider local addresses valid.
087     */
088    private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true);
089
090    /**
091     * RegexValidator for matching domains.
092     */
093    private final RegexValidator domainRegex =
094            new RegexValidator(DOMAIN_NAME_REGEX);
095    /**
096     * RegexValidator for matching the a local hostname
097     */
098    private final RegexValidator hostnameRegex =
099            new RegexValidator(DOMAIN_LABEL_REGEX);
100
101    /**
102     * Returns the singleton instance of this validator. It
103     *  will not consider local addresses as valid.
104     * @return the singleton instance of this validator
105     */
106    public static DomainValidator getInstance() {
107        return DOMAIN_VALIDATOR;
108    }
109
110    /**
111     * Returns the singleton instance of this validator,
112     *  with local validation as required.
113     * @param allowLocal Should local addresses be considered valid?
114     * @return the singleton instance of this validator
115     */
116    public static DomainValidator getInstance(boolean allowLocal) {
117       if (allowLocal) {
118          return DOMAIN_VALIDATOR_WITH_LOCAL;
119       }
120       return DOMAIN_VALIDATOR;
121    }
122
123    /**
124     * Private constructor.
125     * @param allowLocal whether to allow local domains
126     */
127    private DomainValidator(boolean allowLocal) {
128       this.allowLocal = allowLocal;
129    }
130
131    /**
132     * Returns true if the specified <code>String</code> parses
133     * as a valid domain name with a recognized top-level domain.
134     * The parsing is case-sensitive.
135     * @param domain the parameter to check for domain name syntax
136     * @return true if the parameter is a valid domain name
137     */
138    @Override
139    public boolean isValid(String domain) {
140        String[] groups = domainRegex.match(domain);
141        if (groups != null && groups.length > 0) {
142            return isValidTld(groups[0]);
143        } else if (allowLocal) {
144            if (hostnameRegex.isValid(domain)) {
145               return true;
146            }
147        }
148        return false;
149    }
150
151    /**
152     * Returns true if the specified <code>String</code> matches any
153     * IANA-defined top-level domain. Leading dots are ignored if present.
154     * The search is case-sensitive.
155     * @param tld the parameter to check for TLD status
156     * @return true if the parameter is a TLD
157     */
158    public boolean isValidTld(String tld) {
159        if (allowLocal && isValidLocalTld(tld)) {
160           return true;
161        }
162        return isValidInfrastructureTld(tld)
163                || isValidGenericTld(tld)
164                || isValidIdnTld(tld)
165                || isValidCountryCodeTld(tld);
166    }
167
168    /**
169     * Returns true if the specified <code>String</code> matches any
170     * IANA-defined infrastructure top-level domain. Leading dots are
171     * ignored if present. The search is case-sensitive.
172     * @param iTld the parameter to check for infrastructure TLD status
173     * @return true if the parameter is an infrastructure TLD
174     */
175    public boolean isValidInfrastructureTld(String iTld) {
176        return Arrays.binarySearch(INFRASTRUCTURE_TLDS, chompLeadingDot(iTld.toLowerCase(Locale.ENGLISH))) >= 0;
177    }
178
179    /**
180     * Returns true if the specified <code>String</code> matches any
181     * IANA-defined generic top-level domain. Leading dots are ignored
182     * if present. The search is case-sensitive.
183     * @param gTld the parameter to check for generic TLD status
184     * @return true if the parameter is a generic TLD
185     */
186    public boolean isValidGenericTld(String gTld) {
187        return Arrays.binarySearch(GENERIC_TLDS, chompLeadingDot(gTld.toLowerCase(Locale.ENGLISH))) >= 0;
188    }
189
190    /**
191     * Returns true if the specified <code>String</code> matches any
192     * IANA-defined IDN top-level domain. Leading dots are ignored
193     * if present. The search is case-sensitive.
194     * @param iTld the parameter to check for IDN TLD status
195     * @return true if the parameter is an IDN TLD
196     */
197    public boolean isValidIdnTld(String iTld) {
198        return Arrays.binarySearch(IDN_TLDS, chompLeadingDot(iTld.toUpperCase(Locale.ENGLISH))) >= 0;
199    }
200
201    /**
202     * Returns true if the specified <code>String</code> matches any
203     * IANA-defined country code top-level domain. Leading dots are
204     * ignored if present. The search is case-sensitive.
205     * @param ccTld the parameter to check for country code TLD status
206     * @return true if the parameter is a country code TLD
207     */
208    public boolean isValidCountryCodeTld(String ccTld) {
209        return Arrays.binarySearch(COUNTRY_CODE_TLDS, chompLeadingDot(ccTld.toLowerCase(Locale.ENGLISH))) >= 0;
210    }
211
212    /**
213     * Returns true if the specified <code>String</code> matches any
214     * widely used "local" domains (localhost or localdomain). Leading dots are
215     *  ignored if present. The search is case-sensitive.
216     * @param iTld the parameter to check for local TLD status
217     * @return true if the parameter is an local TLD
218     */
219    public boolean isValidLocalTld(String iTld) {
220        return Arrays.binarySearch(LOCAL_TLDS, chompLeadingDot(iTld.toLowerCase(Locale.ENGLISH))) >= 0;
221    }
222
223    private static String chompLeadingDot(String str) {
224        if (str.startsWith(".")) {
225            return str.substring(1);
226        } else {
227            return str;
228        }
229    }
230
231    // ---------------------------------------------
232    // ----- TLDs defined by IANA
233    // ----- Authoritative and comprehensive list at:
234    // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt
235
236    private static final String[] INFRASTRUCTURE_TLDS = new String[] {
237        "arpa",               // internet infrastructure
238        "root"                // diagnostic marker for non-truncated root zone
239    };
240
241    private static final String[] GENERIC_TLDS = new String[] {
242        "abogado",
243        "academy",
244        "accountants",
245        "active",
246        "actor",
247        "aero",
248        "agency",
249        "airforce",
250        "allfinanz",
251        "alsace",
252        "archi",
253        "army",
254        "arpa",
255        "asia",
256        "associates",
257        "attorney",
258        "auction",
259        "audio",
260        "autos",
261        "axa",
262        "band",
263        "bar",
264        "bargains",
265        "bayern",
266        "beer",
267        "berlin",
268        "best",
269        "bid",
270        "bike",
271        "bio",
272        "biz",
273        "black",
274        "blackfriday",
275        "blue",
276        "bmw",
277        "bnpparibas",
278        "boo",
279        "boutique",
280        "brussels",
281        "budapest",
282        "build",
283        "builders",
284        "business",
285        "buzz",
286        "bzh",
287        "cab",
288        "cal",
289        "camera",
290        "camp",
291        "cancerresearch",
292        "capetown",
293        "capital",
294        "caravan",
295        "cards",
296        "care",
297        "career",
298        "careers",
299        "casa",
300        "cash",
301        "cat",
302        "catering",
303        "center",
304        "ceo",
305        "cern",
306        "channel",
307        "cheap",
308        "christmas",
309        "chrome",
310        "church",
311        "citic",
312        "city",
313        "claims",
314        "cleaning",
315        "click",
316        "clinic",
317        "clothing",
318        "club",
319        "codes",
320        "coffee",
321        "college",
322        "cologne",
323        "com",
324        "community",
325        "company",
326        "computer",
327        "condos",
328        "construction",
329        "consulting",
330        "contractors",
331        "cooking",
332        "cool",
333        "coop",
334        "country",
335        "credit",
336        "creditcard",
337        "crs",
338        "cruises",
339        "cuisinella",
340        "cymru",
341        "dad",
342        "dance",
343        "dating",
344        "day",
345        "deals",
346        "degree",
347        "democrat",
348        "dental",
349        "dentist",
350        "desi",
351        "diamonds",
352        "diet",
353        "digital",
354        "direct",
355        "directory",
356        "discount",
357        "dnp",
358        "domains",
359        "durban",
360        "dvag",
361        "eat",
362        "edu",
363        "education",
364        "email",
365        "engineer",
366        "engineering",
367        "enterprises",
368        "equipment",
369        "esq",
370        "estate",
371        "eus",
372        "events",
373        "exchange",
374        "expert",
375        "exposed",
376        "fail",
377        "farm",
378        "feedback",
379        "finance",
380        "financial",
381        "fish",
382        "fishing",
383        "fitness",
384        "flights",
385        "florist",
386        "flsmidth",
387        "fly",
388        "foo",
389        "forsale",
390        "foundation",
391        "frl",
392        "frogans",
393        "fund",
394        "furniture",
395        "futbol",
396        "gal",
397        "gallery",
398        "gbiz",
399        "gent",
400        "gift",
401        "gifts",
402        "gives",
403        "glass",
404        "gle",
405        "global",
406        "globo",
407        "gmail",
408        "gmo",
409        "gmx",
410        "google",
411        "gop",
412        "gov",
413        "graphics",
414        "gratis",
415        "green",
416        "gripe",
417        "guide",
418        "guitars",
419        "guru",
420        "hamburg",
421        "haus",
422        "healthcare",
423        "help",
424        "here",
425        "hiphop",
426        "hiv",
427        "holdings",
428        "holiday",
429        "homes",
430        "horse",
431        "host",
432        "hosting",
433        "house",
434        "how",
435        "ibm",
436        "immo",
437        "immobilien",
438        "industries",
439        "info",
440        "ing",
441        "ink",
442        "institute",
443        "insure",
444        "int",
445        "international",
446        "investments",
447        "jetzt",
448        "jobs",
449        "joburg",
450        "juegos",
451        "kaufen",
452        "kim",
453        "kitchen",
454        "kiwi",
455        "koeln",
456        "krd",
457        "kred",
458        "lacaixa",
459        "land",
460        "lawyer",
461        "lease",
462        "lgbt",
463        "life",
464        "lighting",
465        "limited",
466        "limo",
467        "link",
468        "loans",
469        "london",
470        "lotto",
471        "ltda",
472        "luxe",
473        "luxury",
474        "maison",
475        "management",
476        "mango",
477        "market",
478        "marketing",
479        "media",
480        "meet",
481        "melbourne",
482        "meme",
483        "menu",
484        "miami",
485        "mil",
486        "mini",
487        "mobi",
488        "moda",
489        "moe",
490        "monash",
491        "mortgage",
492        "moscow",
493        "motorcycles",
494        "mov",
495        "museum",
496        "nagoya",
497        "name",
498        "navy",
499        "net",
500        "network",
501        "neustar",
502        "new",
503        "nexus",
504        "ngo",
505        "nhk",
506        "ninja",
507        "nra",
508        "nrw",
509        "nyc",
510        "okinawa",
511        "ong",
512        "onl",
513        "ooo",
514        "org",
515        "organic",
516        "otsuka",
517        "ovh",
518        "paris",
519        "partners",
520        "parts",
521        "pharmacy",
522        "photo",
523        "photography",
524        "photos",
525        "physio",
526        "pics",
527        "pictures",
528        "pink",
529        "pizza",
530        "place",
531        "plumbing",
532        "pohl",
533        "poker",
534        "post",
535        "praxi",
536        "press",
537        "pro",
538        "prod",
539        "productions",
540        "prof",
541        "properties",
542        "property",
543        "pub",
544        "qpon",
545        "quebec",
546        "realtor",
547        "recipes",
548        "red",
549        "rehab",
550        "reise",
551        "reisen",
552        "ren",
553        "rentals",
554        "repair",
555        "report",
556        "republican",
557        "rest",
558        "restaurant",
559        "reviews",
560        "rich",
561        "rio",
562        "rip",
563        "rocks",
564        "rodeo",
565        "rsvp",
566        "ruhr",
567        "ryukyu",
568        "saarland",
569        "sarl",
570        "sca",
571        "scb",
572        "schmidt",
573        "schule",
574        "scot",
575        "services",
576        "sexy",
577        "shiksha",
578        "shoes",
579        "singles",
580        "social",
581        "software",
582        "sohu",
583        "solar",
584        "solutions",
585        "soy",
586        "space",
587        "spiegel",
588        "supplies",
589        "supply",
590        "support",
591        "surf",
592        "surgery",
593        "suzuki",
594        "systems",
595        "tatar",
596        "tattoo",
597        "tax",
598        "technology",
599        "tel",
600        "tienda",
601        "tips",
602        "tirol",
603        "today",
604        "tokyo",
605        "tools",
606        "top",
607        "town",
608        "toys",
609        "trade",
610        "training",
611        "travel",
612        "tui",
613        "university",
614        "uno",
615        "uol",
616        "vacations",
617        "vegas",
618        "ventures",
619        "versicherung",
620        "vet",
621        "viajes",
622        "villas",
623        "vision",
624        "vlaanderen",
625        "vodka",
626        "vote",
627        "voting",
628        "voto",
629        "voyage",
630        "wales",
631        "wang",
632        "watch",
633        "webcam",
634        "website",
635        "wed",
636        "wedding",
637        "whoswho",
638        "wien",
639        "wiki",
640        "williamhill",
641        "wme",
642        "work",
643        "works",
644        "world",
645        "wtc",
646        "wtf",
647        "xxx",
648        "xyz",
649        "yachts",
650        "yandex",
651        "yoga",
652        "yokohama",
653        "youtube",
654        "zip",
655        "zone",
656    };
657
658    // JOSM PATCH BEGIN
659    // see #10862 - list of IDN TLDs taken from IANA on 2014-12-18
660    private static final String[] IDN_TLDS = new String[] {
661        "XN--1QQW23A",
662        "XN--3BST00M",
663        "XN--3DS443G",
664        "XN--3E0B707E",
665        "XN--45BRJ9C",
666        "XN--45Q11C",
667        "XN--4GBRIM",
668        "XN--55QW42G",
669        "XN--55QX5D",
670        "XN--6FRZ82G",
671        "XN--6QQ986B3XL",
672        "XN--80ADXHKS",
673        "XN--80AO21A",
674        "XN--80ASEHDB",
675        "XN--80ASWG",
676        "XN--90A3AC",
677        "XN--C1AVG",
678        "XN--CG4BKI",
679        "XN--CLCHC0EA0B2G2A9GCD",
680        "XN--CZR694B",
681        "XN--CZRS0T",
682        "XN--CZRU2D",
683        "XN--D1ACJ3B",
684        "XN--D1ALF",
685        "XN--FIQ228C5HS",
686        "XN--FIQ64B",
687        "XN--FIQS8S",
688        "XN--FIQZ9S",
689        "XN--FLW351E",
690        "XN--FPCRJ9C3D",
691        "XN--FZC2C9E2C",
692        "XN--GECRJ9C",
693        "XN--H2BRJ9C",
694        "XN--HXT814E",
695        "XN--I1B6B1A6A2E",
696        "XN--IO0A7I",
697        "XN--J1AMH",
698        "XN--J6W193G",
699        "XN--KPRW13D",
700        "XN--KPRY57D",
701        "XN--KPUT3I",
702        "XN--L1ACC",
703        "XN--LGBBAT1AD8J",
704        "XN--MGB9AWBF",
705        "XN--MGBA3A4F16A",
706        "XN--MGBAAM7A8H",
707        "XN--MGBAB2BD",
708        "XN--MGBAYH7GPA",
709        "XN--MGBBH1A71E",
710        "XN--MGBC0A9AZCG",
711        "XN--MGBERP4A5D4AR",
712        "XN--MGBX4CD0AB",
713        "XN--NGBC5AZD",
714        "XN--NODE",
715        "XN--NQV7F",
716        "XN--NQV7FS00EMA",
717        "XN--O3CW4H",
718        "XN--OGBPF8FL",
719        "XN--P1ACF",
720        "XN--P1AI",
721        "XN--PGBS0DH",
722        "XN--Q9JYB4C",
723        "XN--QCKA1PMC",
724        "XN--RHQV96G",
725        "XN--S9BRJ9C",
726        "XN--SES554G",
727        "XN--UNUP4Y",
728        "XN--VERMGENSBERATER-CTB",
729        "XN--VERMGENSBERATUNG-PWB",
730        "XN--VHQUV",
731        "XN--WGBH1C",
732        "XN--WGBL6A",
733        "XN--XHQ521B",
734        "XN--XKC2AL3HYE2A",
735        "XN--XKC2DL3A5EE0H",
736        "XN--YFRO4I67O",
737        "XN--YGBI2AMMX",
738        "XN--ZFR164B",
739    };
740    // END JOSM PATCH
741
742    private static final String[] COUNTRY_CODE_TLDS = new String[] {
743        "ac",                 // Ascension Island
744        "ad",                 // Andorra
745        "ae",                 // United Arab Emirates
746        "af",                 // Afghanistan
747        "ag",                 // Antigua and Barbuda
748        "ai",                 // Anguilla
749        "al",                 // Albania
750        "am",                 // Armenia
751        "an",                 // Netherlands Antilles
752        "ao",                 // Angola
753        "aq",                 // Antarctica
754        "ar",                 // Argentina
755        "as",                 // American Samoa
756        "at",                 // Austria
757        "au",                 // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands)
758        "aw",                 // Aruba
759        "ax",                 // Åland
760        "az",                 // Azerbaijan
761        "ba",                 // Bosnia and Herzegovina
762        "bb",                 // Barbados
763        "bd",                 // Bangladesh
764        "be",                 // Belgium
765        "bf",                 // Burkina Faso
766        "bg",                 // Bulgaria
767        "bh",                 // Bahrain
768        "bi",                 // Burundi
769        "bj",                 // Benin
770        "bm",                 // Bermuda
771        "bn",                 // Brunei Darussalam
772        "bo",                 // Bolivia
773        "br",                 // Brazil
774        "bs",                 // Bahamas
775        "bt",                 // Bhutan
776        "bv",                 // Bouvet Island
777        "bw",                 // Botswana
778        "by",                 // Belarus
779        "bz",                 // Belize
780        "ca",                 // Canada
781        "cc",                 // Cocos (Keeling) Islands
782        "cd",                 // Democratic Republic of the Congo (formerly Zaire)
783        "cf",                 // Central African Republic
784        "cg",                 // Republic of the Congo
785        "ch",                 // Switzerland
786        "ci",                 // Côte d'Ivoire
787        "ck",                 // Cook Islands
788        "cl",                 // Chile
789        "cm",                 // Cameroon
790        "cn",                 // China, mainland
791        "co",                 // Colombia
792        "cr",                 // Costa Rica
793        "cu",                 // Cuba
794        "cv",                 // Cape Verde
795        "cw",                 // Curaçao
796        "cx",                 // Christmas Island
797        "cy",                 // Cyprus
798        "cz",                 // Czech Republic
799        "de",                 // Germany
800        "dj",                 // Djibouti
801        "dk",                 // Denmark
802        "dm",                 // Dominica
803        "do",                 // Dominican Republic
804        "dz",                 // Algeria
805        "ec",                 // Ecuador
806        "ee",                 // Estonia
807        "eg",                 // Egypt
808        "er",                 // Eritrea
809        "es",                 // Spain
810        "et",                 // Ethiopia
811        "eu",                 // European Union
812        "fi",                 // Finland
813        "fj",                 // Fiji
814        "fk",                 // Falkland Islands
815        "fm",                 // Federated States of Micronesia
816        "fo",                 // Faroe Islands
817        "fr",                 // France
818        "ga",                 // Gabon
819        "gb",                 // Great Britain (United Kingdom)
820        "gd",                 // Grenada
821        "ge",                 // Georgia
822        "gf",                 // French Guiana
823        "gg",                 // Guernsey
824        "gh",                 // Ghana
825        "gi",                 // Gibraltar
826        "gl",                 // Greenland
827        "gm",                 // The Gambia
828        "gn",                 // Guinea
829        "gp",                 // Guadeloupe
830        "gq",                 // Equatorial Guinea
831        "gr",                 // Greece
832        "gs",                 // South Georgia and the South Sandwich Islands
833        "gt",                 // Guatemala
834        "gu",                 // Guam
835        "gw",                 // Guinea-Bissau
836        "gy",                 // Guyana
837        "hk",                 // Hong Kong
838        "hm",                 // Heard Island and McDonald Islands
839        "hn",                 // Honduras
840        "hr",                 // Croatia (Hrvatska)
841        "ht",                 // Haiti
842        "hu",                 // Hungary
843        "id",                 // Indonesia
844        "ie",                 // Ireland (Éire)
845        "il",                 // Israel
846        "im",                 // Isle of Man
847        "in",                 // India
848        "io",                 // British Indian Ocean Territory
849        "iq",                 // Iraq
850        "ir",                 // Iran
851        "is",                 // Iceland
852        "it",                 // Italy
853        "je",                 // Jersey
854        "jm",                 // Jamaica
855        "jo",                 // Jordan
856        "jp",                 // Japan
857        "ke",                 // Kenya
858        "kg",                 // Kyrgyzstan
859        "kh",                 // Cambodia (Khmer)
860        "ki",                 // Kiribati
861        "km",                 // Comoros
862        "kn",                 // Saint Kitts and Nevis
863        "kp",                 // North Korea
864        "kr",                 // South Korea
865        "kw",                 // Kuwait
866        "ky",                 // Cayman Islands
867        "kz",                 // Kazakhstan
868        "la",                 // Laos (currently being marketed as the official domain for Los Angeles)
869        "lb",                 // Lebanon
870        "lc",                 // Saint Lucia
871        "li",                 // Liechtenstein
872        "lk",                 // Sri Lanka
873        "lr",                 // Liberia
874        "ls",                 // Lesotho
875        "lt",                 // Lithuania
876        "lu",                 // Luxembourg
877        "lv",                 // Latvia
878        "ly",                 // Libya
879        "ma",                 // Morocco
880        "mc",                 // Monaco
881        "md",                 // Moldova
882        "me",                 // Montenegro
883        "mg",                 // Madagascar
884        "mh",                 // Marshall Islands
885        "mk",                 // Republic of Macedonia
886        "ml",                 // Mali
887        "mm",                 // Myanmar
888        "mn",                 // Mongolia
889        "mo",                 // Macau
890        "mp",                 // Northern Mariana Islands
891        "mq",                 // Martinique
892        "mr",                 // Mauritania
893        "ms",                 // Montserrat
894        "mt",                 // Malta
895        "mu",                 // Mauritius
896        "mv",                 // Maldives
897        "mw",                 // Malawi
898        "mx",                 // Mexico
899        "my",                 // Malaysia
900        "mz",                 // Mozambique
901        "na",                 // Namibia
902        "nc",                 // New Caledonia
903        "ne",                 // Niger
904        "nf",                 // Norfolk Island
905        "ng",                 // Nigeria
906        "ni",                 // Nicaragua
907        "nl",                 // Netherlands
908        "no",                 // Norway
909        "np",                 // Nepal
910        "nr",                 // Nauru
911        "nu",                 // Niue
912        "nz",                 // New Zealand
913        "om",                 // Oman
914        "pa",                 // Panama
915        "pe",                 // Peru
916        "pf",                 // French Polynesia With Clipperton Island
917        "pg",                 // Papua New Guinea
918        "ph",                 // Philippines
919        "pk",                 // Pakistan
920        "pl",                 // Poland
921        "pm",                 // Saint-Pierre and Miquelon
922        "pn",                 // Pitcairn Islands
923        "pr",                 // Puerto Rico
924        "ps",                 // Palestinian territories (PA-controlled West Bank and Gaza Strip)
925        "pt",                 // Portugal
926        "pw",                 // Palau
927        "py",                 // Paraguay
928        "qa",                 // Qatar
929        "re",                 // Réunion
930        "ro",                 // Romania
931        "rs",                 // Serbia
932        "ru",                 // Russia
933        "rw",                 // Rwanda
934        "sa",                 // Saudi Arabia
935        "sb",                 // Solomon Islands
936        "sc",                 // Seychelles
937        "sd",                 // Sudan
938        "se",                 // Sweden
939        "sg",                 // Singapore
940        "sh",                 // Saint Helena
941        "si",                 // Slovenia
942        "sj",                 // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no)
943        "sk",                 // Slovakia
944        "sl",                 // Sierra Leone
945        "sm",                 // San Marino
946        "sn",                 // Senegal
947        "so",                 // Somalia
948        "sr",                 // Suriname
949        "st",                 // São Tomé and Príncipe
950        "su",                 // Soviet Union (deprecated)
951        "sv",                 // El Salvador
952        "sx",                 // Sint Maarten
953        "sy",                 // Syria
954        "sz",                 // Swaziland
955        "tc",                 // Turks and Caicos Islands
956        "td",                 // Chad
957        "tf",                 // French Southern and Antarctic Lands
958        "tg",                 // Togo
959        "th",                 // Thailand
960        "tj",                 // Tajikistan
961        "tk",                 // Tokelau
962        "tl",                 // East Timor (deprecated old code)
963        "tm",                 // Turkmenistan
964        "tn",                 // Tunisia
965        "to",                 // Tonga
966        "tp",                 // East Timor
967        "tr",                 // Turkey
968        "tt",                 // Trinidad and Tobago
969        "tv",                 // Tuvalu
970        "tw",                 // Taiwan, Republic of China
971        "tz",                 // Tanzania
972        "ua",                 // Ukraine
973        "ug",                 // Uganda
974        "uk",                 // United Kingdom
975        "um",                 // United States Minor Outlying Islands
976        "us",                 // United States of America
977        "uy",                 // Uruguay
978        "uz",                 // Uzbekistan
979        "va",                 // Vatican City State
980        "vc",                 // Saint Vincent and the Grenadines
981        "ve",                 // Venezuela
982        "vg",                 // British Virgin Islands
983        "vi",                 // U.S. Virgin Islands
984        "vn",                 // Vietnam
985        "vu",                 // Vanuatu
986        "wf",                 // Wallis and Futuna
987        "ws",                 // Samoa (formerly Western Samoa)
988        "ye",                 // Yemen
989        "yt",                 // Mayotte
990        "yu",                 // Serbia and Montenegro (originally Yugoslavia)
991        "za",                 // South Africa
992        "zm",                 // Zambia
993        "zw",                 // Zimbabwe
994    };
995
996    private static final String[] LOCAL_TLDS = new String[] {
997       "localhost",           // RFC2606 defined
998       "localdomain"          // Also widely used as localhost.localdomain
999   };
1000
1001    static {
1002        Arrays.sort(INFRASTRUCTURE_TLDS);
1003        Arrays.sort(COUNTRY_CODE_TLDS);
1004        Arrays.sort(GENERIC_TLDS);
1005        Arrays.sort(IDN_TLDS);
1006        Arrays.sort(LOCAL_TLDS);
1007    }
1008}