Comparing strings using UCA (depending on Locale)
authorgu.martinm@gmail.com <gu.martinm@gmail.com>
Mon, 16 Jun 2014 00:57:48 +0000 (02:57 +0200)
committergu.martinm@gmail.com <gu.martinm@gmail.com>
Mon, 16 Jun 2014 00:57:48 +0000 (02:57 +0200)
Allgemeines/UNICODECollation/src/de/example/collation/CollationExample.java

index 40feeee..189550c 100644 (file)
@@ -16,7 +16,7 @@ public class CollationExample {
         final List<String> wordsListES = new ArrayList<String>(Arrays.asList(words));
         final List<String> wordsListFR = new ArrayList<String>(Arrays.asList(words));
 
-        final Collator es_ESCollator = Collator.getInstance(new Locale("es","ES"));
+        Collator es_ESCollator = Collator.getInstance(new Locale("es", "ES"));
         final Collator fr_FRCollator = Collator.getInstance(Locale.FRANCE);
 
         // String.compareTo and String.compareToIgnoreCase are using UNICODE
@@ -25,12 +25,47 @@ public class CollationExample {
         //test.compareToIgnoreCase("gus");
 
         es_ESCollator.setStrength(Collator.TERTIARY);
+        es_ESCollator.setDecomposition(Collator.FULL_DECOMPOSITION);
         Collections.sort(wordsListES, es_ESCollator);
         fr_FRCollator.setStrength(Collator.TERTIARY);
+        fr_FRCollator.setDecomposition(Collator.FULL_DECOMPOSITION);
         Collections.sort(wordsListFR, fr_FRCollator);
 
         System.out.println("Words list ES: " + wordsListES);
         System.out.println("Words list FR: " + wordsListFR);
+
+        // Shouldn't it be the same word in German?
+        // CHECK MONO RESULTS, IT IS NOT THE SAME!!!! O.o
+        // Why Mono and Java give me different results? WTF!!! :(
+        System.out.println("strasse");
+        Collator de_DECollator = Collator.getInstance(new Locale("de", "DE"));
+        de_DECollator.setStrength(Collator.TERTIARY);
+        de_DECollator.setDecomposition(Collator.FULL_DECOMPOSITION);
+        int result = de_DECollator.compare("strasse", "straße");
+        System.out.println("German result: " + result);
+        es_ESCollator = Collator.getInstance(new Locale("es", "ES"));
+        es_ESCollator.setStrength(Collator.TERTIARY);
+        es_ESCollator.setDecomposition(Collator.FULL_DECOMPOSITION);
+        // Neither in German nor in Spanish they are the same word. I do not
+        // understand collations :(
+        result = es_ESCollator.compare("strasse", "straße");
+        System.out.println("Spanish result: " + result);
+
+        // Shouldn't it be the same word in German?
+        // IN THIS CASE I GET THE SAME RESULTS USING MONO :)
+        System.out.println("koennen");
+        de_DECollator = Collator.getInstance(new Locale("de", "DE"));
+        de_DECollator.setStrength(Collator.TERTIARY);
+        de_DECollator.setDecomposition(Collator.FULL_DECOMPOSITION);
+        result = de_DECollator.compare("können", "koennen");
+        System.out.println("German result: " + result);
+        es_ESCollator = Collator.getInstance(new Locale("es", "ES"));
+        es_ESCollator.setStrength(Collator.TERTIARY);
+        es_ESCollator.setDecomposition(Collator.FULL_DECOMPOSITION);
+        // Neither in German nor in Spanish they are the same word. I do not
+        // understand collations :(
+        result = es_ESCollator.compare("können", "koennen");
+        System.out.println("Spanish result: " + result);
     }
 
 }