001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.openstreetmap.josm.data.validation.routines;
018
019import java.util.Arrays;
020import java.util.Locale;
021
022/**
023 * <p><b>Domain name</b> validation routines.</p>
024 *
025 * <p>
026 * This validator provides methods for validating Internet domain names
027 * and top-level domains.
028 * </p>
029 *
030 * <p>Domain names are evaluated according
031 * to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>,
032 * section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>,
033 * section 2.1. No accomodation is provided for the specialized needs of
034 * other applications; if the domain name has been URL-encoded, for example,
035 * validation will fail even though the equivalent plaintext version of the
036 * same name would have passed.
037 * </p>
038 *
039 * <p>
040 * Validation is also provided for top-level domains (TLDs) as defined and
041 * maintained by the Internet Assigned Numbers Authority (IANA):
042 * </p>
043 *
044 *   <ul>
045 *     <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs
046 *         (<code>.arpa</code>, etc.)</li>
047 *     <li>{@link #isValidGenericTld} - validates generic TLDs
048 *         (<code>.com, .org</code>, etc.)</li>
049 *     <li>{@link #isValidIdnTld} - validates IDN TLDs
050 *         (<code>.xn--*</code>, etc.)</li>
051 *     <li>{@link #isValidCountryCodeTld} - validates country code TLDs
052 *         (<code>.us, .uk, .cn</code>, etc.)</li>
053 *   </ul>
054 *
055 * <p>
056 * (<b>NOTE</b>: This class does not provide IP address lookup for domain names or
057 * methods to ensure that a given domain name matches a specific IP; see
058 * {@link java.net.InetAddress} for that functionality.)
059 * </p>
060 *
061 * @version $Revision: 1640271 $ $Date: 2014-11-18 02:32:15 2014 UTC (Tue, 18 Nov 2014) $
062 * @since Validator 1.4
063 */
064public final class DomainValidator extends AbstractValidator {
065
066    // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123)
067    private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*";
068    private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}";
069    // JOSM PATCH BEGIN
070    // See #10862 - IDN TLDs in ASCII form
071    private static final String TOP_LABEL_IDN_REGEX = "(?:xn|XN)--\\p{Alnum}{2,}(?:-\\p{Alpha}{2,})?";
072    private static final String DOMAIN_NAME_REGEX =
073            "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + "|" + TOP_LABEL_IDN_REGEX + ")$";
074    // JOSM PATCH END
075
076    private final boolean allowLocal;
077
078    /**
079     * Singleton instance of this validator, which
080     *  doesn't consider local addresses as valid.
081     */
082    private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false);
083
084    /**
085     * Singleton instance of this validator, which does
086     *  consider local addresses valid.
087     */
088    private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true);
089
090    /**
091     * RegexValidator for matching domains.
092     */
093    private final RegexValidator domainRegex =
094            new RegexValidator(DOMAIN_NAME_REGEX);
095    /**
096     * RegexValidator for matching the a local hostname
097     */
098    private final RegexValidator hostnameRegex =
099            new RegexValidator(DOMAIN_LABEL_REGEX);
100
101    /**
102     * Returns the singleton instance of this validator. It
103     *  will not consider local addresses as valid.
104     * @return the singleton instance of this validator
105     */
106    public static DomainValidator getInstance() {
107        return DOMAIN_VALIDATOR;
108    }
109
110    /**
111     * Returns the singleton instance of this validator,
112     *  with local validation as required.
113     * @param allowLocal Should local addresses be considered valid?
114     * @return the singleton instance of this validator
115     */
116    public static DomainValidator getInstance(boolean allowLocal) {
117       if (allowLocal) {
118          return DOMAIN_VALIDATOR_WITH_LOCAL;
119       }
120       return DOMAIN_VALIDATOR;
121    }
122
123    /** Private constructor. */
124    private DomainValidator(boolean allowLocal) {
125       this.allowLocal = allowLocal;
126    }
127
128    /**
129     * Returns true if the specified <code>String</code> parses
130     * as a valid domain name with a recognized top-level domain.
131     * The parsing is case-sensitive.
132     * @param domain the parameter to check for domain name syntax
133     * @return true if the parameter is a valid domain name
134     */
135    @Override
136    public boolean isValid(String domain) {
137        String[] groups = domainRegex.match(domain);
138        if (groups != null && groups.length > 0) {
139            return isValidTld(groups[0]);
140        } else if (allowLocal) {
141            if (hostnameRegex.isValid(domain)) {
142               return true;
143            }
144        }
145        return false;
146    }
147
148    /**
149     * Returns true if the specified <code>String</code> matches any
150     * IANA-defined top-level domain. Leading dots are ignored if present.
151     * The search is case-sensitive.
152     * @param tld the parameter to check for TLD status
153     * @return true if the parameter is a TLD
154     */
155    public boolean isValidTld(String tld) {
156        if (allowLocal && isValidLocalTld(tld)) {
157           return true;
158        }
159        return isValidInfrastructureTld(tld)
160                || isValidGenericTld(tld)
161                || isValidIdnTld(tld)
162                || isValidCountryCodeTld(tld);
163    }
164
165    /**
166     * Returns true if the specified <code>String</code> matches any
167     * IANA-defined infrastructure top-level domain. Leading dots are
168     * ignored if present. The search is case-sensitive.
169     * @param iTld the parameter to check for infrastructure TLD status
170     * @return true if the parameter is an infrastructure TLD
171     */
172    public boolean isValidInfrastructureTld(String iTld) {
173        return Arrays.binarySearch(INFRASTRUCTURE_TLDS, chompLeadingDot(iTld.toLowerCase(Locale.ENGLISH))) >= 0;
174    }
175
176    /**
177     * Returns true if the specified <code>String</code> matches any
178     * IANA-defined generic top-level domain. Leading dots are ignored
179     * if present. The search is case-sensitive.
180     * @param gTld the parameter to check for generic TLD status
181     * @return true if the parameter is a generic TLD
182     */
183    public boolean isValidGenericTld(String gTld) {
184        return Arrays.binarySearch(GENERIC_TLDS, chompLeadingDot(gTld.toLowerCase(Locale.ENGLISH))) >= 0;
185    }
186
187    /**
188     * Returns true if the specified <code>String</code> matches any
189     * IANA-defined IDN top-level domain. Leading dots are ignored
190     * if present. The search is case-sensitive.
191     * @param iTld the parameter to check for IDN TLD status
192     * @return true if the parameter is an IDN TLD
193     */
194    public boolean isValidIdnTld(String iTld) {
195        return Arrays.binarySearch(IDN_TLDS, chompLeadingDot(iTld.toUpperCase(Locale.ENGLISH))) >= 0;
196    }
197
198    /**
199     * Returns true if the specified <code>String</code> matches any
200     * IANA-defined country code top-level domain. Leading dots are
201     * ignored if present. The search is case-sensitive.
202     * @param ccTld the parameter to check for country code TLD status
203     * @return true if the parameter is a country code TLD
204     */
205    public boolean isValidCountryCodeTld(String ccTld) {
206        return Arrays.binarySearch(COUNTRY_CODE_TLDS, chompLeadingDot(ccTld.toLowerCase(Locale.ENGLISH))) >= 0;
207    }
208
209    /**
210     * Returns true if the specified <code>String</code> matches any
211     * widely used "local" domains (localhost or localdomain). Leading dots are
212     *  ignored if present. The search is case-sensitive.
213     * @param iTld the parameter to check for local TLD status
214     * @return true if the parameter is an local TLD
215     */
216    public boolean isValidLocalTld(String iTld) {
217        return Arrays.binarySearch(LOCAL_TLDS, chompLeadingDot(iTld.toLowerCase(Locale.ENGLISH))) >= 0;
218    }
219
220    private static String chompLeadingDot(String str) {
221        if (str.startsWith(".")) {
222            return str.substring(1);
223        } else {
224            return str;
225        }
226    }
227
228    // ---------------------------------------------
229    // ----- TLDs defined by IANA
230    // ----- Authoritative and comprehensive list at:
231    // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt
232
233    private static final String[] INFRASTRUCTURE_TLDS = new String[] {
234        "arpa",               // internet infrastructure
235        "root"                // diagnostic marker for non-truncated root zone
236    };
237
238    private static final String[] GENERIC_TLDS = new String[] {
239        "abogado",
240        "academy",
241        "accountants",
242        "active",
243        "actor",
244        "aero",
245        "agency",
246        "airforce",
247        "allfinanz",
248        "alsace",
249        "archi",
250        "army",
251        "arpa",
252        "asia",
253        "associates",
254        "attorney",
255        "auction",
256        "audio",
257        "autos",
258        "axa",
259        "band",
260        "bar",
261        "bargains",
262        "bayern",
263        "beer",
264        "berlin",
265        "best",
266        "bid",
267        "bike",
268        "bio",
269        "biz",
270        "black",
271        "blackfriday",
272        "blue",
273        "bmw",
274        "bnpparibas",
275        "boo",
276        "boutique",
277        "brussels",
278        "budapest",
279        "build",
280        "builders",
281        "business",
282        "buzz",
283        "bzh",
284        "cab",
285        "cal",
286        "camera",
287        "camp",
288        "cancerresearch",
289        "capetown",
290        "capital",
291        "caravan",
292        "cards",
293        "care",
294        "career",
295        "careers",
296        "casa",
297        "cash",
298        "cat",
299        "catering",
300        "center",
301        "ceo",
302        "cern",
303        "channel",
304        "cheap",
305        "christmas",
306        "chrome",
307        "church",
308        "citic",
309        "city",
310        "claims",
311        "cleaning",
312        "click",
313        "clinic",
314        "clothing",
315        "club",
316        "codes",
317        "coffee",
318        "college",
319        "cologne",
320        "com",
321        "community",
322        "company",
323        "computer",
324        "condos",
325        "construction",
326        "consulting",
327        "contractors",
328        "cooking",
329        "cool",
330        "coop",
331        "country",
332        "credit",
333        "creditcard",
334        "crs",
335        "cruises",
336        "cuisinella",
337        "cymru",
338        "dad",
339        "dance",
340        "dating",
341        "day",
342        "deals",
343        "degree",
344        "democrat",
345        "dental",
346        "dentist",
347        "desi",
348        "diamonds",
349        "diet",
350        "digital",
351        "direct",
352        "directory",
353        "discount",
354        "dnp",
355        "domains",
356        "durban",
357        "dvag",
358        "eat",
359        "edu",
360        "education",
361        "email",
362        "engineer",
363        "engineering",
364        "enterprises",
365        "equipment",
366        "esq",
367        "estate",
368        "eus",
369        "events",
370        "exchange",
371        "expert",
372        "exposed",
373        "fail",
374        "farm",
375        "feedback",
376        "finance",
377        "financial",
378        "fish",
379        "fishing",
380        "fitness",
381        "flights",
382        "florist",
383        "flsmidth",
384        "fly",
385        "foo",
386        "forsale",
387        "foundation",
388        "frl",
389        "frogans",
390        "fund",
391        "furniture",
392        "futbol",
393        "gal",
394        "gallery",
395        "gbiz",
396        "gent",
397        "gift",
398        "gifts",
399        "gives",
400        "glass",
401        "gle",
402        "global",
403        "globo",
404        "gmail",
405        "gmo",
406        "gmx",
407        "google",
408        "gop",
409        "gov",
410        "graphics",
411        "gratis",
412        "green",
413        "gripe",
414        "guide",
415        "guitars",
416        "guru",
417        "hamburg",
418        "haus",
419        "healthcare",
420        "help",
421        "here",
422        "hiphop",
423        "hiv",
424        "holdings",
425        "holiday",
426        "homes",
427        "horse",
428        "host",
429        "hosting",
430        "house",
431        "how",
432        "ibm",
433        "immo",
434        "immobilien",
435        "industries",
436        "info",
437        "ing",
438        "ink",
439        "institute",
440        "insure",
441        "int",
442        "international",
443        "investments",
444        "jetzt",
445        "jobs",
446        "joburg",
447        "juegos",
448        "kaufen",
449        "kim",
450        "kitchen",
451        "kiwi",
452        "koeln",
453        "krd",
454        "kred",
455        "lacaixa",
456        "land",
457        "lawyer",
458        "lease",
459        "lgbt",
460        "life",
461        "lighting",
462        "limited",
463        "limo",
464        "link",
465        "loans",
466        "london",
467        "lotto",
468        "ltda",
469        "luxe",
470        "luxury",
471        "maison",
472        "management",
473        "mango",
474        "market",
475        "marketing",
476        "media",
477        "meet",
478        "melbourne",
479        "meme",
480        "menu",
481        "miami",
482        "mil",
483        "mini",
484        "mobi",
485        "moda",
486        "moe",
487        "monash",
488        "mortgage",
489        "moscow",
490        "motorcycles",
491        "mov",
492        "museum",
493        "nagoya",
494        "name",
495        "navy",
496        "net",
497        "network",
498        "neustar",
499        "new",
500        "nexus",
501        "ngo",
502        "nhk",
503        "ninja",
504        "nra",
505        "nrw",
506        "nyc",
507        "okinawa",
508        "ong",
509        "onl",
510        "ooo",
511        "org",
512        "organic",
513        "otsuka",
514        "ovh",
515        "paris",
516        "partners",
517        "parts",
518        "pharmacy",
519        "photo",
520        "photography",
521        "photos",
522        "physio",
523        "pics",
524        "pictures",
525        "pink",
526        "pizza",
527        "place",
528        "plumbing",
529        "pohl",
530        "poker",
531        "post",
532        "praxi",
533        "press",
534        "pro",
535        "prod",
536        "productions",
537        "prof",
538        "properties",
539        "property",
540        "pub",
541        "qpon",
542        "quebec",
543        "realtor",
544        "recipes",
545        "red",
546        "rehab",
547        "reise",
548        "reisen",
549        "ren",
550        "rentals",
551        "repair",
552        "report",
553        "republican",
554        "rest",
555        "restaurant",
556        "reviews",
557        "rich",
558        "rio",
559        "rip",
560        "rocks",
561        "rodeo",
562        "rsvp",
563        "ruhr",
564        "ryukyu",
565        "saarland",
566        "sarl",
567        "sca",
568        "scb",
569        "schmidt",
570        "schule",
571        "scot",
572        "services",
573        "sexy",
574        "shiksha",
575        "shoes",
576        "singles",
577        "social",
578        "software",
579        "sohu",
580        "solar",
581        "solutions",
582        "soy",
583        "space",
584        "spiegel",
585        "supplies",
586        "supply",
587        "support",
588        "surf",
589        "surgery",
590        "suzuki",
591        "systems",
592        "tatar",
593        "tattoo",
594        "tax",
595        "technology",
596        "tel",
597        "tienda",
598        "tips",
599        "tirol",
600        "today",
601        "tokyo",
602        "tools",
603        "top",
604        "town",
605        "toys",
606        "trade",
607        "training",
608        "travel",
609        "tui",
610        "university",
611        "uno",
612        "uol",
613        "vacations",
614        "vegas",
615        "ventures",
616        "versicherung",
617        "vet",
618        "viajes",
619        "villas",
620        "vision",
621        "vlaanderen",
622        "vodka",
623        "vote",
624        "voting",
625        "voto",
626        "voyage",
627        "wales",
628        "wang",
629        "watch",
630        "webcam",
631        "website",
632        "wed",
633        "wedding",
634        "whoswho",
635        "wien",
636        "wiki",
637        "williamhill",
638        "wme",
639        "work",
640        "works",
641        "world",
642        "wtc",
643        "wtf",
644        "xxx",
645        "xyz",
646        "yachts",
647        "yandex",
648        "yoga",
649        "yokohama",
650        "youtube",
651        "zip",
652        "zone",
653    };
654
655    // JOSM PATCH BEGIN
656    // see #10862 - list of IDN TLDs taken from IANA on 2014-12-18
657    private static final String[] IDN_TLDS = new String[] {
658        "XN--1QQW23A",
659        "XN--3BST00M",
660        "XN--3DS443G",
661        "XN--3E0B707E",
662        "XN--45BRJ9C",
663        "XN--45Q11C",
664        "XN--4GBRIM",
665        "XN--55QW42G",
666        "XN--55QX5D",
667        "XN--6FRZ82G",
668        "XN--6QQ986B3XL",
669        "XN--80ADXHKS",
670        "XN--80AO21A",
671        "XN--80ASEHDB",
672        "XN--80ASWG",
673        "XN--90A3AC",
674        "XN--C1AVG",
675        "XN--CG4BKI",
676        "XN--CLCHC0EA0B2G2A9GCD",
677        "XN--CZR694B",
678        "XN--CZRS0T",
679        "XN--CZRU2D",
680        "XN--D1ACJ3B",
681        "XN--D1ALF",
682        "XN--FIQ228C5HS",
683        "XN--FIQ64B",
684        "XN--FIQS8S",
685        "XN--FIQZ9S",
686        "XN--FLW351E",
687        "XN--FPCRJ9C3D",
688        "XN--FZC2C9E2C",
689        "XN--GECRJ9C",
690        "XN--H2BRJ9C",
691        "XN--HXT814E",
692        "XN--I1B6B1A6A2E",
693        "XN--IO0A7I",
694        "XN--J1AMH",
695        "XN--J6W193G",
696        "XN--KPRW13D",
697        "XN--KPRY57D",
698        "XN--KPUT3I",
699        "XN--L1ACC",
700        "XN--LGBBAT1AD8J",
701        "XN--MGB9AWBF",
702        "XN--MGBA3A4F16A",
703        "XN--MGBAAM7A8H",
704        "XN--MGBAB2BD",
705        "XN--MGBAYH7GPA",
706        "XN--MGBBH1A71E",
707        "XN--MGBC0A9AZCG",
708        "XN--MGBERP4A5D4AR",
709        "XN--MGBX4CD0AB",
710        "XN--NGBC5AZD",
711        "XN--NODE",
712        "XN--NQV7F",
713        "XN--NQV7FS00EMA",
714        "XN--O3CW4H",
715        "XN--OGBPF8FL",
716        "XN--P1ACF",
717        "XN--P1AI",
718        "XN--PGBS0DH",
719        "XN--Q9JYB4C",
720        "XN--QCKA1PMC",
721        "XN--RHQV96G",
722        "XN--S9BRJ9C",
723        "XN--SES554G",
724        "XN--UNUP4Y",
725        "XN--VERMGENSBERATER-CTB",
726        "XN--VERMGENSBERATUNG-PWB",
727        "XN--VHQUV",
728        "XN--WGBH1C",
729        "XN--WGBL6A",
730        "XN--XHQ521B",
731        "XN--XKC2AL3HYE2A",
732        "XN--XKC2DL3A5EE0H",
733        "XN--YFRO4I67O",
734        "XN--YGBI2AMMX",
735        "XN--ZFR164B",
736    };
737    // END JOSM PATCH
738
739    private static final String[] COUNTRY_CODE_TLDS = new String[] {
740        "ac",                 // Ascension Island
741        "ad",                 // Andorra
742        "ae",                 // United Arab Emirates
743        "af",                 // Afghanistan
744        "ag",                 // Antigua and Barbuda
745        "ai",                 // Anguilla
746        "al",                 // Albania
747        "am",                 // Armenia
748        "an",                 // Netherlands Antilles
749        "ao",                 // Angola
750        "aq",                 // Antarctica
751        "ar",                 // Argentina
752        "as",                 // American Samoa
753        "at",                 // Austria
754        "au",                 // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands)
755        "aw",                 // Aruba
756        "ax",                 // Åland
757        "az",                 // Azerbaijan
758        "ba",                 // Bosnia and Herzegovina
759        "bb",                 // Barbados
760        "bd",                 // Bangladesh
761        "be",                 // Belgium
762        "bf",                 // Burkina Faso
763        "bg",                 // Bulgaria
764        "bh",                 // Bahrain
765        "bi",                 // Burundi
766        "bj",                 // Benin
767        "bm",                 // Bermuda
768        "bn",                 // Brunei Darussalam
769        "bo",                 // Bolivia
770        "br",                 // Brazil
771        "bs",                 // Bahamas
772        "bt",                 // Bhutan
773        "bv",                 // Bouvet Island
774        "bw",                 // Botswana
775        "by",                 // Belarus
776        "bz",                 // Belize
777        "ca",                 // Canada
778        "cc",                 // Cocos (Keeling) Islands
779        "cd",                 // Democratic Republic of the Congo (formerly Zaire)
780        "cf",                 // Central African Republic
781        "cg",                 // Republic of the Congo
782        "ch",                 // Switzerland
783        "ci",                 // Côte d'Ivoire
784        "ck",                 // Cook Islands
785        "cl",                 // Chile
786        "cm",                 // Cameroon
787        "cn",                 // China, mainland
788        "co",                 // Colombia
789        "cr",                 // Costa Rica
790        "cu",                 // Cuba
791        "cv",                 // Cape Verde
792        "cw",                 // Curaçao
793        "cx",                 // Christmas Island
794        "cy",                 // Cyprus
795        "cz",                 // Czech Republic
796        "de",                 // Germany
797        "dj",                 // Djibouti
798        "dk",                 // Denmark
799        "dm",                 // Dominica
800        "do",                 // Dominican Republic
801        "dz",                 // Algeria
802        "ec",                 // Ecuador
803        "ee",                 // Estonia
804        "eg",                 // Egypt
805        "er",                 // Eritrea
806        "es",                 // Spain
807        "et",                 // Ethiopia
808        "eu",                 // European Union
809        "fi",                 // Finland
810        "fj",                 // Fiji
811        "fk",                 // Falkland Islands
812        "fm",                 // Federated States of Micronesia
813        "fo",                 // Faroe Islands
814        "fr",                 // France
815        "ga",                 // Gabon
816        "gb",                 // Great Britain (United Kingdom)
817        "gd",                 // Grenada
818        "ge",                 // Georgia
819        "gf",                 // French Guiana
820        "gg",                 // Guernsey
821        "gh",                 // Ghana
822        "gi",                 // Gibraltar
823        "gl",                 // Greenland
824        "gm",                 // The Gambia
825        "gn",                 // Guinea
826        "gp",                 // Guadeloupe
827        "gq",                 // Equatorial Guinea
828        "gr",                 // Greece
829        "gs",                 // South Georgia and the South Sandwich Islands
830        "gt",                 // Guatemala
831        "gu",                 // Guam
832        "gw",                 // Guinea-Bissau
833        "gy",                 // Guyana
834        "hk",                 // Hong Kong
835        "hm",                 // Heard Island and McDonald Islands
836        "hn",                 // Honduras
837        "hr",                 // Croatia (Hrvatska)
838        "ht",                 // Haiti
839        "hu",                 // Hungary
840        "id",                 // Indonesia
841        "ie",                 // Ireland (Éire)
842        "il",                 // Israel
843        "im",                 // Isle of Man
844        "in",                 // India
845        "io",                 // British Indian Ocean Territory
846        "iq",                 // Iraq
847        "ir",                 // Iran
848        "is",                 // Iceland
849        "it",                 // Italy
850        "je",                 // Jersey
851        "jm",                 // Jamaica
852        "jo",                 // Jordan
853        "jp",                 // Japan
854        "ke",                 // Kenya
855        "kg",                 // Kyrgyzstan
856        "kh",                 // Cambodia (Khmer)
857        "ki",                 // Kiribati
858        "km",                 // Comoros
859        "kn",                 // Saint Kitts and Nevis
860        "kp",                 // North Korea
861        "kr",                 // South Korea
862        "kw",                 // Kuwait
863        "ky",                 // Cayman Islands
864        "kz",                 // Kazakhstan
865        "la",                 // Laos (currently being marketed as the official domain for Los Angeles)
866        "lb",                 // Lebanon
867        "lc",                 // Saint Lucia
868        "li",                 // Liechtenstein
869        "lk",                 // Sri Lanka
870        "lr",                 // Liberia
871        "ls",                 // Lesotho
872        "lt",                 // Lithuania
873        "lu",                 // Luxembourg
874        "lv",                 // Latvia
875        "ly",                 // Libya
876        "ma",                 // Morocco
877        "mc",                 // Monaco
878        "md",                 // Moldova
879        "me",                 // Montenegro
880        "mg",                 // Madagascar
881        "mh",                 // Marshall Islands
882        "mk",                 // Republic of Macedonia
883        "ml",                 // Mali
884        "mm",                 // Myanmar
885        "mn",                 // Mongolia
886        "mo",                 // Macau
887        "mp",                 // Northern Mariana Islands
888        "mq",                 // Martinique
889        "mr",                 // Mauritania
890        "ms",                 // Montserrat
891        "mt",                 // Malta
892        "mu",                 // Mauritius
893        "mv",                 // Maldives
894        "mw",                 // Malawi
895        "mx",                 // Mexico
896        "my",                 // Malaysia
897        "mz",                 // Mozambique
898        "na",                 // Namibia
899        "nc",                 // New Caledonia
900        "ne",                 // Niger
901        "nf",                 // Norfolk Island
902        "ng",                 // Nigeria
903        "ni",                 // Nicaragua
904        "nl",                 // Netherlands
905        "no",                 // Norway
906        "np",                 // Nepal
907        "nr",                 // Nauru
908        "nu",                 // Niue
909        "nz",                 // New Zealand
910        "om",                 // Oman
911        "pa",                 // Panama
912        "pe",                 // Peru
913        "pf",                 // French Polynesia With Clipperton Island
914        "pg",                 // Papua New Guinea
915        "ph",                 // Philippines
916        "pk",                 // Pakistan
917        "pl",                 // Poland
918        "pm",                 // Saint-Pierre and Miquelon
919        "pn",                 // Pitcairn Islands
920        "pr",                 // Puerto Rico
921        "ps",                 // Palestinian territories (PA-controlled West Bank and Gaza Strip)
922        "pt",                 // Portugal
923        "pw",                 // Palau
924        "py",                 // Paraguay
925        "qa",                 // Qatar
926        "re",                 // Réunion
927        "ro",                 // Romania
928        "rs",                 // Serbia
929        "ru",                 // Russia
930        "rw",                 // Rwanda
931        "sa",                 // Saudi Arabia
932        "sb",                 // Solomon Islands
933        "sc",                 // Seychelles
934        "sd",                 // Sudan
935        "se",                 // Sweden
936        "sg",                 // Singapore
937        "sh",                 // Saint Helena
938        "si",                 // Slovenia
939        "sj",                 // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no)
940        "sk",                 // Slovakia
941        "sl",                 // Sierra Leone
942        "sm",                 // San Marino
943        "sn",                 // Senegal
944        "so",                 // Somalia
945        "sr",                 // Suriname
946        "st",                 // São Tomé and Príncipe
947        "su",                 // Soviet Union (deprecated)
948        "sv",                 // El Salvador
949        "sx",                 // Sint Maarten
950        "sy",                 // Syria
951        "sz",                 // Swaziland
952        "tc",                 // Turks and Caicos Islands
953        "td",                 // Chad
954        "tf",                 // French Southern and Antarctic Lands
955        "tg",                 // Togo
956        "th",                 // Thailand
957        "tj",                 // Tajikistan
958        "tk",                 // Tokelau
959        "tl",                 // East Timor (deprecated old code)
960        "tm",                 // Turkmenistan
961        "tn",                 // Tunisia
962        "to",                 // Tonga
963        "tp",                 // East Timor
964        "tr",                 // Turkey
965        "tt",                 // Trinidad and Tobago
966        "tv",                 // Tuvalu
967        "tw",                 // Taiwan, Republic of China
968        "tz",                 // Tanzania
969        "ua",                 // Ukraine
970        "ug",                 // Uganda
971        "uk",                 // United Kingdom
972        "um",                 // United States Minor Outlying Islands
973        "us",                 // United States of America
974        "uy",                 // Uruguay
975        "uz",                 // Uzbekistan
976        "va",                 // Vatican City State
977        "vc",                 // Saint Vincent and the Grenadines
978        "ve",                 // Venezuela
979        "vg",                 // British Virgin Islands
980        "vi",                 // U.S. Virgin Islands
981        "vn",                 // Vietnam
982        "vu",                 // Vanuatu
983        "wf",                 // Wallis and Futuna
984        "ws",                 // Samoa (formerly Western Samoa)
985        "ye",                 // Yemen
986        "yt",                 // Mayotte
987        "yu",                 // Serbia and Montenegro (originally Yugoslavia)
988        "za",                 // South Africa
989        "zm",                 // Zambia
990        "zw",                 // Zimbabwe
991    };
992
993    private static final String[] LOCAL_TLDS = new String[] {
994       "localhost",           // RFC2606 defined
995       "localdomain"          // Also widely used as localhost.localdomain
996   };
997
998    static {
999        Arrays.sort(INFRASTRUCTURE_TLDS);
1000        Arrays.sort(COUNTRY_CODE_TLDS);
1001        Arrays.sort(GENERIC_TLDS);
1002        Arrays.sort(IDN_TLDS);
1003        Arrays.sort(LOCAL_TLDS);
1004    }
1005}