NullifyNetwork

The blog and home page of Simon Soanes
Skip to content
[ Log On ]

I'm sure there's an easier way to do this, but I've been maintaining my own class to do the job for a while now and keep needing to find it in my code library every time I use it - I figure it might be useful to others:- 

using System;
using System.Collections.Generic;
using System.Text;
using System.Data;

namespace CSVParser
{
    /// <summary>
    /// Simon's stock CSV parser class
    /// </summary>
    public class CSVParser
    {
        /// <summary>
        /// Takes the CSV files contents and creates a data table from it
        /// </summary>
        /// <param name="csvFileContents">The entire contents of a CSV file split by line (rather than the filename)</param>
        /// <param name="validateLayout">Validate the file to check conformance of the layout with expected standards</param>
        /// <param name="topRowIsHeader">The top row is the header row, take it as the column names rather than data</param>
        /// <returns>A datatable</returns>
        public static DataTable DataTableFromCSV(string[] csvFileContents, bool validateLayout, bool topRowIsHeader)
        {
            DataTable outputDataTable = new DataTable();
            List<string[]> csvFileRows = new List<string[]>();
            List<string> columns = new List<string>();

            #region Pre-parse the file
            int columnCount = 0;

            bool gotHeaders = false;
            int rowNumber = 0;

            foreach (string line in csvFileContents)
            {
                string[] parts = ExtractCSVElements(line);

                //initial set of header names but only if the top row is header option is set
                if (!gotHeaders && topRowIsHeader)
                {
                    columns.AddRange(parts);
                    columnCount = parts.Length;
                    gotHeaders = true;
                }
                else
                {
                    if (parts.Length > 0)
                    {
                        csvFileRows.Add(parts);
                    }
                }

                if (parts.Length > columnCount)
                {
                    //if set to validate the layout and that the first row contains the headers then we know any extra columns are wrong
                    if (validateLayout && gotHeaders)
                    {
                        throw new Exception("Row has extra data columns: " + rowNumber.ToString());
                    }

                    //new column detected mid-data-set!
                    for (int i = columnCount; i < parts.Length; i++)
                    {
                        columns.Add("Column " + i.ToString());
                    }

                    columnCount = parts.Length;
                }

                //we always ignore zero length rows as the last line can be empty
                if (parts.Length < columnCount && parts.Length != 0)
                {
                    if (validateLayout)
                    {
                        throw new Exception("Row has missing data columns: " + rowNumber.ToString());
                    }
                }


                rowNumber++;
            }

            #endregion

            #region Build the data tables layout and data

            //columns
            foreach (string column in columns)
            {
                outputDataTable.Columns.Add(column);
            }

            //rows
            foreach (string[] row in csvFileRows)
            {
                outputDataTable.Rows.Add(row);
            }
            #endregion

            return outputDataTable;
        }

        /// <summary>
        /// Extract the elements of a line from a CSV file with support for quotes
        /// </summary>
        /// <param name="line">The data to parse</param>
        private static string[] ExtractCSVElements(string line)
        {
            List<string> elements = new List<string>();

            //do the initial split, based on commas
            string[] firstParts = line.Split(',');

            //reparse it
            StringBuilder temporaryPart = new StringBuilder("");
            bool inside = false;
            foreach (string part in firstParts)
            {
                //are we inside a quoted part, or did we just find a quote?
                if (!inside //we're not inside
                && (!part.Contains("\"") //and we don't contain a quote
                 || ( //or we're handling a single quote enclosed element
                        part.StartsWith("\"") //we start with a quote
                        && part.EndsWith("\"") //and we end with a quote)
                    )
                )
                {
                    elements.Add(part);
                }
                else
                {
                    if (inside)
                    {
                        //we are still inside a quote...
                        temporaryPart.Append("," + part);

                        if (part.Contains("\""))
                        {
                            //then we are also at the end!
                            elements.Add(temporaryPart.Replace("\"", "").ToString()); //add the part minus its quotes to the array
                            //all done!
                            inside = false;
                        }
                    }
                    else
                    {
                        //else we just found a quote!
                        inside = true;
                        temporaryPart = new StringBuilder(part);
                    }
                }
            }

            return elements.ToArray();
        }
    }
}

 

Permalink