November 18, 2008

Parsing CSV files that have embedded commas

Use regular expression to parse lines in a CSV to allow embedded commas:


While Not sr.EndOfStream
    Dim matches As MatchCollection = Regex.Matches(sr.ReadLine(), "(?:^|,)(\"(?:[^\"]+|\"\")*\"|[^,]*)")
    For Each match As Match in matches
      Dim sItem As String = match.Group(0).Value
End While


while (!sr.EndOfStream){
  MatchCollection matches = Regex.Matches(sr.ReadLine(), "(?:^|,)(\"(?:[^\"]+|\"\")*\"|[^,]*)");
  foreach(Match match in matches){
    string sItem = match.Group[0].Value;

