org.apache.commons.codec.language
Class DoubleMetaphone

java.lang.Object
  extended byorg.apache.commons.codec.language.DoubleMetaphone
All Implemented Interfaces:
Encoder, StringEncoder

public class DoubleMetaphone
extends Object
implements StringEncoder

Encodes a string into a double metaphone value. This Implementation is based on the algorithm by Lawrence Philips.

Version:
$Id: DoubleMetaphone.java,v 1.24 2004/06/05 18:32:04 ggregory Exp $
Author:
Apache Software Foundation

Nested Class Summary
 class DoubleMetaphone.DoubleMetaphoneResult
          Inner class for storing results, since there is the optional alternate encoding.
 
Field Summary
private static String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER
           
private static String[] L_R_N_M_B_H_F_V_W_SPACE
           
private static String[] L_T_K_S_N_M_B_Z
           
protected  int maxCodeLen
          Maximum length of an encoding, default is 4
private static String[] SILENT_START
          Prefixes when present which are not pronounced
private static String VOWELS
          "Vowels" to test for
 
Constructor Summary
DoubleMetaphone()
          Creates an instance of this DoubleMetaphone encoder
 
Method Summary
protected  char charAt(String value, int index)
          Gets the character at index index if available, otherwise it returns Character.MIN_VALUE so that there is some sort of a default
private  String cleanInput(String input)
          Cleans the input
private  boolean conditionC0(String value, int index)
          Complex condition 0 for 'C'
private  boolean conditionCH0(String value, int index)
          Complex condition 0 for 'CH'
private  boolean conditionCH1(String value, int index)
          Complex condition 1 for 'CH'
private  boolean conditionL0(String value, int index)
          Complex condition 0 for 'L'
private  boolean conditionM0(String value, int index)
          Complex condition 0 for 'M'
private static boolean contains(String value, int start, int length, String criteria)
          Shortcut method with 1 criteria
protected static boolean contains(String value, int start, int length, String[] criteria)
          Determines whether value contains any of the criteria starting at index start and matching up to length length
private static boolean contains(String value, int start, int length, String criteria1, String criteria2)
          Shortcut method with 2 criteria
private static boolean contains(String value, int start, int length, String criteria1, String criteria2, String criteria3)
          Shortcut method with 3 criteria
private static boolean contains(String value, int start, int length, String criteria1, String criteria2, String criteria3, String criteria4)
          Shortcut method with 4 criteria
private static boolean contains(String value, int start, int length, String criteria1, String criteria2, String criteria3, String criteria4, String criteria5)
          Shortcut method with 5 criteria
private static boolean contains(String value, int start, int length, String criteria1, String criteria2, String criteria3, String criteria4, String criteria5, String criteria6)
          Shortcut method with 6 criteria
 String doubleMetaphone(String value)
          Encode a value with Double Metaphone
 String doubleMetaphone(String value, boolean alternate)
          Encode a value with Double Metaphone, optionally using the alternate encoding.
 Object encode(Object obj)
          Encode the value using DoubleMetaphone.
 String encode(String value)
          Encode the value using DoubleMetaphone.
 int getMaxCodeLen()
          Returns the maxCodeLen.
private  int handleAEIOUY(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases
private  int handleC(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'C' cases
private  int handleCC(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'CC' cases
private  int handleCH(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'CH' cases
private  int handleD(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'D' cases
private  int handleG(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index, boolean slavoGermanic)
          Handles 'G' cases
private  int handleGH(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'GH' cases
private  int handleH(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'H' cases
private  int handleJ(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index, boolean slavoGermanic)
          Handles 'J' cases
private  int handleL(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'L' cases
private  int handleP(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'P' cases
private  int handleR(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index, boolean slavoGermanic)
          Handles 'R' cases
private  int handleS(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index, boolean slavoGermanic)
          Handles 'S' cases
private  int handleSC(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'SC' cases
private  int handleT(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'T' cases
private  int handleW(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'W' cases
private  int handleX(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index)
          Handles 'X' cases
private  int handleZ(String value, DoubleMetaphone.DoubleMetaphoneResult result, int index, boolean slavoGermanic)
          Handles 'Z' cases
 boolean isDoubleMetaphoneEqual(String value1, String value2)
          Check if the Double Metaphone values of two String values are equal.
 boolean isDoubleMetaphoneEqual(String value1, String value2, boolean alternate)
          Check if the Double Metaphone values of two String values are equal, optionally using the alternate value.
private  boolean isSilentStart(String value)
          Determines whether or not the value starts with a silent letter.
private  boolean isSlavoGermanic(String value)
          Determines whether or not a value is of slavo-germanic orgin.
private  boolean isVowel(char ch)
          Determines whether or not a character is a vowel or not
 void setMaxCodeLen(int maxCodeLen)
          Sets the maxCodeLen.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

VOWELS

private static final String VOWELS
"Vowels" to test for

See Also:
Constant Field Values

SILENT_START

private static final String[] SILENT_START
Prefixes when present which are not pronounced


L_R_N_M_B_H_F_V_W_SPACE

private static final String[] L_R_N_M_B_H_F_V_W_SPACE

ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER

private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER

L_T_K_S_N_M_B_Z

private static final String[] L_T_K_S_N_M_B_Z

maxCodeLen

protected int maxCodeLen
Maximum length of an encoding, default is 4

Constructor Detail

DoubleMetaphone

public DoubleMetaphone()
Creates an instance of this DoubleMetaphone encoder

Method Detail

doubleMetaphone

public String doubleMetaphone(String value)
Encode a value with Double Metaphone

Parameters:
value - String to encode
Returns:
an encoded string

doubleMetaphone

public String doubleMetaphone(String value,
                              boolean alternate)
Encode a value with Double Metaphone, optionally using the alternate encoding.

Parameters:
value - String to encode
alternate - use alternate encode
Returns:
an encoded string

encode

public Object encode(Object obj)
              throws EncoderException
Encode the value using DoubleMetaphone. It will only work if obj is a String (like Metaphone).

Specified by:
encode in interface Encoder
Parameters:
obj - Object to encode (should be of type String)
Returns:
An encoded Object (will be of type String)
Throws:
EncoderException - encode parameter is not of type String

encode

public String encode(String value)
Encode the value using DoubleMetaphone.

Specified by:
encode in interface StringEncoder
Parameters:
value - String to encode
Returns:
An encoded String

isDoubleMetaphoneEqual

public boolean isDoubleMetaphoneEqual(String value1,
                                      String value2)
Check if the Double Metaphone values of two String values are equal.

Parameters:
value1 - The left-hand side of the encoded String.equals(Object).
value2 - The right-hand side of the encoded String.equals(Object).
Returns:
true if the encoded Strings are equal; false otherwise.
See Also:
isDoubleMetaphoneEqual(String,String,boolean)

isDoubleMetaphoneEqual

public boolean isDoubleMetaphoneEqual(String value1,
                                      String value2,
                                      boolean alternate)
Check if the Double Metaphone values of two String values are equal, optionally using the alternate value.

Parameters:
value1 - The left-hand side of the encoded String.equals(Object).
value2 - The right-hand side of the encoded String.equals(Object).
alternate - use the alternate value if true.
Returns:
true if the encoded Strings are equal; false otherwise.

getMaxCodeLen

public int getMaxCodeLen()
Returns the maxCodeLen.

Returns:
int

setMaxCodeLen

public void setMaxCodeLen(int maxCodeLen)
Sets the maxCodeLen.

Parameters:
maxCodeLen - The maxCodeLen to set

handleAEIOUY

private int handleAEIOUY(String value,
                         DoubleMetaphone.DoubleMetaphoneResult result,
                         int index)
Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases


handleC

private int handleC(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index)
Handles 'C' cases


handleCC

private int handleCC(String value,
                     DoubleMetaphone.DoubleMetaphoneResult result,
                     int index)
Handles 'CC' cases


handleCH

private int handleCH(String value,
                     DoubleMetaphone.DoubleMetaphoneResult result,
                     int index)
Handles 'CH' cases


handleD

private int handleD(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index)
Handles 'D' cases


handleG

private int handleG(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index,
                    boolean slavoGermanic)
Handles 'G' cases


handleGH

private int handleGH(String value,
                     DoubleMetaphone.DoubleMetaphoneResult result,
                     int index)
Handles 'GH' cases


handleH

private int handleH(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index)
Handles 'H' cases


handleJ

private int handleJ(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index,
                    boolean slavoGermanic)
Handles 'J' cases


handleL

private int handleL(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index)
Handles 'L' cases


handleP

private int handleP(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index)
Handles 'P' cases


handleR

private int handleR(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index,
                    boolean slavoGermanic)
Handles 'R' cases


handleS

private int handleS(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index,
                    boolean slavoGermanic)
Handles 'S' cases


handleSC

private int handleSC(String value,
                     DoubleMetaphone.DoubleMetaphoneResult result,
                     int index)
Handles 'SC' cases


handleT

private int handleT(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index)
Handles 'T' cases


handleW

private int handleW(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index)
Handles 'W' cases


handleX

private int handleX(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index)
Handles 'X' cases


handleZ

private int handleZ(String value,
                    DoubleMetaphone.DoubleMetaphoneResult result,
                    int index,
                    boolean slavoGermanic)
Handles 'Z' cases


conditionC0

private boolean conditionC0(String value,
                            int index)
Complex condition 0 for 'C'


conditionCH0

private boolean conditionCH0(String value,
                             int index)
Complex condition 0 for 'CH'


conditionCH1

private boolean conditionCH1(String value,
                             int index)
Complex condition 1 for 'CH'


conditionL0

private boolean conditionL0(String value,
                            int index)
Complex condition 0 for 'L'


conditionM0

private boolean conditionM0(String value,
                            int index)
Complex condition 0 for 'M'


isSlavoGermanic

private boolean isSlavoGermanic(String value)
Determines whether or not a value is of slavo-germanic orgin. A value is of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.


isVowel

private boolean isVowel(char ch)
Determines whether or not a character is a vowel or not


isSilentStart

private boolean isSilentStart(String value)
Determines whether or not the value starts with a silent letter. It will return true if the value starts with any of 'GN', 'KN', 'PN', 'WR' or 'PS'.


cleanInput

private String cleanInput(String input)
Cleans the input


charAt

protected char charAt(String value,
                      int index)
Gets the character at index index if available, otherwise it returns Character.MIN_VALUE so that there is some sort of a default


contains

private static boolean contains(String value,
                                int start,
                                int length,
                                String criteria)
Shortcut method with 1 criteria


contains

private static boolean contains(String value,
                                int start,
                                int length,
                                String criteria1,
                                String criteria2)
Shortcut method with 2 criteria


contains

private static boolean contains(String value,
                                int start,
                                int length,
                                String criteria1,
                                String criteria2,
                                String criteria3)
Shortcut method with 3 criteria


contains

private static boolean contains(String value,
                                int start,
                                int length,
                                String criteria1,
                                String criteria2,
                                String criteria3,
                                String criteria4)
Shortcut method with 4 criteria


contains

private static boolean contains(String value,
                                int start,
                                int length,
                                String criteria1,
                                String criteria2,
                                String criteria3,
                                String criteria4,
                                String criteria5)
Shortcut method with 5 criteria


contains

private static boolean contains(String value,
                                int start,
                                int length,
                                String criteria1,
                                String criteria2,
                                String criteria3,
                                String criteria4,
                                String criteria5,
                                String criteria6)
Shortcut method with 6 criteria


contains

protected static boolean contains(String value,
                                  int start,
                                  int length,
                                  String[] criteria)
Determines whether value contains any of the criteria starting at index start and matching up to length length



commons-codec version 1.3 - Copyright © 2002-2004 - Apache Software Foundation