c# - How to measure similarity of 2 strings aside from computing distance -


i creating program checks if word simplified word(txt, msg, etc.) , if simplified finds correct spelling txt=text, msg=message. iam using nhunspell suggest method in c# suggest possible results.

the problem if inputted "txt" result text,tat, tot, etc. dont know how select correct word. used levenshtein distance (c# - compare string similarity) results still results 1.

input: txt result: text = 1, ext = 1 tit = 1

can me how meaning or correct spelling of simplified words? example: msg

i have tested input sample data , text has distance of 25 whereas other have distance of 33. here's code:

string input = "txt"; string[] words = new[]{"text","tat","tot"}; var levenshtein = new levenshtein();   const int maxdistance = 30;  var distancegroups = words         .select(w => new         {             word = w,             distance = levenshtein.ild(w.toupperinvariant(), input)         })         .where(x => x.distance <= maxdistance)         .groupby(x => x.distance)         .orderby(g => g.key)         .tolist(); foreach (var topcandidate in distancegroups.first())     console.writeline("word:{0} distance:{1}", topcandidate.word, topcandidate.distance); 

and here levenshtein class:

public class levenshtein {     ///*****************************     /// compute levenshtein distance      /// memory efficient version     ///*****************************     public int ild(string srow, string scol)     {         int rowlen = srow.length;  // length of srow         int collen = scol.length;  // length of scol         int rowidx;                // iterates through srow         int colidx;                // iterates through scol         char row_i;                // ith character of srow         char col_j;                // jth character of scol         int cost;                   // cost          /// test string length         if (math.max(srow.length, scol.length) > math.pow(2, 31))             throw (new exception("\nmaximum string length in levenshtein.ild " + math.pow(2, 31) + ".\nyours " + math.max(srow.length, scol.length) + "."));          // step 1          if (rowlen == 0)         {             return collen;         }          if (collen == 0)         {             return rowlen;         }          /// create 2 vectors         int[] v0 = new int[rowlen + 1];         int[] v1 = new int[rowlen + 1];         int[] vtmp;            /// step 2         /// initialize first vector         (rowidx = 1; rowidx <= rowlen; rowidx++)         {             v0[rowidx] = rowidx;         }          // step 3          /// fore each column         (colidx = 1; colidx <= collen; colidx++)         {             /// set 0'th element column number             v1[0] = colidx;              col_j = scol[colidx - 1];               // step 4              /// fore each row             (rowidx = 1; rowidx <= rowlen; rowidx++)             {                 row_i = srow[rowidx - 1];                   // step 5                  if (row_i == col_j)                 {                     cost = 0;                 }                 else                 {                     cost = 1;                 }                  // step 6                  /// find minimum                 int m_min = v0[rowidx] + 1;                 int b = v1[rowidx - 1] + 1;                 int c = v0[rowidx - 1] + cost;                  if (b < m_min)                 {                     m_min = b;                 }                 if (c < m_min)                 {                     m_min = c;                 }                  v1[rowidx] = m_min;             }              /// swap vectors             vtmp = v0;             v0 = v1;             v1 = vtmp;          }          // step 7          /// value between 0 - 100         /// 0==perfect match 100==totaly different         ///          /// vectors swaped 1 last time @ end of last loop,         /// why result in v0 rather in v1         //system.console.writeline("idist=" + v0[rowlen]);         int max = system.math.max(rowlen, collen);         return ((100 * v0[rowlen]) / max);     }       ///*****************************     /// compute min     ///*****************************      private int minimum(int a, int b, int c)     {         int mi = a;          if (b < mi)         {             mi = b;         }         if (c < mi)         {             mi = c;         }          return mi;     }      ///*****************************     /// compute levenshtein distance              ///*****************************     public int ld(string snew, string sold)     {         int[,] matrix;              // matrix         int snewlen = snew.length;  // length of snew         int soldlen = sold.length;  // length of sold         int snewidx; // iterates through snew         int soldidx; // iterates through sold         char snew_i; // ith character of snew         char sold_j; // jth character of sold         int cost; // cost          /// test string length         if (math.max(snew.length, sold.length) > math.pow(2, 31))             throw (new exception("\nmaximum string length in levenshtein.ld " + math.pow(2, 31) + ".\nyours " + math.max(snew.length, sold.length) + "."));          // step 1          if (snewlen == 0)         {             return soldlen;         }          if (soldlen == 0)         {             return snewlen;         }          matrix = new int[snewlen + 1, soldlen + 1];          // step 2          (snewidx = 0; snewidx <= snewlen; snewidx++)         {             matrix[snewidx, 0] = snewidx;         }          (soldidx = 0; soldidx <= soldlen; soldidx++)         {             matrix[0, soldidx] = soldidx;         }          // step 3          (snewidx = 1; snewidx <= snewlen; snewidx++)         {             snew_i = snew[snewidx - 1];              // step 4              (soldidx = 1; soldidx <= soldlen; soldidx++)             {                 sold_j = sold[soldidx - 1];                  // step 5                  if (snew_i == sold_j)                 {                     cost = 0;                 }                 else                 {                     cost = 1;                 }                  // step 6                  matrix[snewidx, soldidx] = minimum(matrix[snewidx - 1, soldidx] + 1, matrix[snewidx, soldidx - 1] + 1, matrix[snewidx - 1, soldidx - 1] + cost);              }         }          // step 7          /// value between 0 - 100         /// 0==perfect match 100==totaly different         //system.console.writeline("dist=" + matrix[snewlen, soldlen]);         int max = system.math.max(snewlen, soldlen);         return (100 * matrix[snewlen, soldlen]) / max;     } } 

Comments