Monday, June 26, 2017

Get rid of extra double quotes in the text column of flat file

start = 0;
// fix extra " chars, like ["some text " with quote in the middle"]
while ((pos = line.IndexOf('"', start)) != -1)
{
   // if not first and last char
   if( (pos != 0) && (pos != line.Length-1))
   {
      if ((line.Substring(pos, 2) == "\"" + delimiter))  // like "|
         justStop = true; // stop and skip
      else if ((line.Substring(pos - 1, 2) == delimiter + "\""))  // like |"
         justStop = true; // stop and skip

      // If two Double Quotes in the middle, drop both
      else if ((line.Substring(pos, 2) == "\"\""))  // like ""
      {
         // need to check that it is NOT ""| where we can drop both " and leave the text not closed with "
         if (pos < line.Length -2) // safe to sub 3 chars, like ""|CRLF
         {
            if ((line.Substring(pos, 3) == "\"\"" + delimiter))
               line = line.Remove(pos, 1);  // remove JUST ONE double quote char
            else // not before | then drop two
               line = line.Remove(pos, 2);  // remove TWO double quote chars
         }
         else // it is ""CRLF where we drop just one "
            line = line.Remove(pos, 1);  // remove JUST ONE double quote char
     
         isAnyDoubleQuotes = true;
      }

      // just single Double Quote
      else
      {
         line = line.Remove(pos, 1);  // otherwise remove double quote char
         isAnyDoubleQuotes = true;
      }
   }

   start = pos + 1;
}

No comments:

Post a Comment