Comparison of regex removal of html tags and using memory stream for the same impression.
Compiled regex is faster.
class Program { static void Main(string[] args) { int repeats = 1; Regex regex = new Regex(@"<(.|\n)*?>",RegexOptions.Compiled); var htmlStr = @" <div id='lipsum'> <p> </p><ul> <li>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</li> <li>Vestibulum tincidunt eros ac velit scelerisque pharetra.</li> </ul> <p></p> <p> </p><ul> <li>Aenean pellentesque mauris et massa eleifend tristique.</li> <li>In rutrum magna at arcu molestie porta.</li> <li>Pellentesque rutrum nibh non est auctor varius.</li> </ul> <p></p> <p> </p><ul> <li>Vestibulum elementum ante blandit risus cursus convallis.</li> <li>In id magna lacinia, luctus sapien in, tempor elit.</li> </ul> <p></p> <p> </p><ul> <li>Integer tempus tellus nec purus ultrices, quis facilisis magna adipiscing.</li> <li>Cras convallis sapien vel augue ultrices pulvinar.</li> <li>Sed pretium eros vel tellus feugiat, ut congue erat pellentesque.</li> <li>Vestibulum convallis tortor congue sapien condimentum, sit amet vestibulum nisl tristique.</li> <li>Cras non lacus sagittis, auctor massa eget, pulvinar elit.</li> <li>Morbi eu augue tincidunt, luctus lacus vestibulum, varius elit.</li> </ul> <p></p> <p> </p><ul> <li>Cras at dui sed justo convallis mattis.</li> <li>Curabitur molestie mi nec dui interdum aliquet.</li> <li>Maecenas malesuada magna non aliquam sollicitudin.</li> <li>Maecenas porta erat quis turpis dictum faucibus.</li> <li>Duis dignissim elit a ultrices tristique.</li> </ul> <p></p></div>"; //var timer1 = new Stopwatch(); //Console.WriteLine("Not compiled"); //timer1.Start(); //for (int i = 0; i < repeats; i++) //{ // Do.StripHtmlFromString(htmlStr); //} //timer1.Stop(); var timer1 = new Stopwatch(); Console.WriteLine("Inhouse"); timer1.Start(); for (int i = 0; i < repeats; i++) { Do.DisplaySearchSummary(htmlStr, 150, regex); } timer1.Stop(); Console.WriteLine("Inhouse: " + timer1.ElapsedTicks); var timer2 = new Stopwatch(); Console.WriteLine("umbraco"); timer2.Start(); for (int i = 0; i < repeats; i++) { Do.Truncate(htmlStr, 150, true, false); } timer2.Stop(); Console.WriteLine("Umbraco: " + timer2.ElapsedTicks); Console.ReadKey(); Console.ReadKey(); } } public class Do { /// <summary> /// Strips the HTML from string. /// </summary> /// <param name="htmlString">The HTML string.</param> /// <returns> /// String without html tags. /// </returns> public static string StripHtmlFromString(string htmlString, Regex regex) { if (string.IsNullOrEmpty(htmlString)) return htmlString; return regex.Replace(htmlString,string.Empty); } public static string DisplaySearchSummary(string descriptionText, int requiredLength, Regex regex, string ending = "...") { descriptionText = StripHtmlFromString(descriptionText, regex); if (!string.IsNullOrEmpty(descriptionText) && descriptionText.Length >= requiredLength) { // Get the required requiredLength of the string var requiredtext = descriptionText.Substring(0, requiredLength - 1); // Select the last occurence of a whitespace character to break the string correctly var ouputtext = string.Concat(requiredtext.Substring(0, requiredtext.LastIndexOf(' ')), ending); return ouputtext; } return descriptionText; } public static string Truncate(string html, int length, bool addElipsis, bool treatTagsAsContent) { using (var outputms = new MemoryStream()) { using (var outputtw = new StreamWriter(outputms)) { using (var ms = new MemoryStream()) { using (var tw = new StreamWriter(ms)) { tw.Write(html); tw.Flush(); ms.Position = 0; var tagStack = new Stack<string>(); using (TextReader tr = new StreamReader(ms)) { bool IsInsideElement = false; bool lengthReached = false; int ic = 0; int currentLength = 0, currentTextLength = 0; string currentTag = string.Empty; string tagContents = string.Empty; bool insideTagSpaceEncountered = false; bool isTagClose = false; while ((ic = tr.Read()) != -1) { bool write = true; if (ic == (int)'<') { if (!lengthReached) { IsInsideElement = true; } insideTagSpaceEncountered = false; currentTag = string.Empty; tagContents = string.Empty; isTagClose = false; if (tr.Peek() == (int)'/') { isTagClose = true; } } else if (ic == (int)'>') { //if (IsInsideElement) //{ IsInsideElement = false; //if (write) //{ // outputtw.Write('>'); //} currentTextLength++; if (isTagClose && tagStack.Count > 0) { string thisTag = tagStack.Pop(); outputtw.Write("</" + thisTag + ">"); } if (!isTagClose && currentTag.Length > 0) { if (!lengthReached) { tagStack.Push(currentTag); outputtw.Write("<" + currentTag); if (tr.Peek() != (int)' ') { if (!string.IsNullOrEmpty(tagContents)) { if (tagContents.EndsWith("/")) { //short close tagStack.Pop(); } outputtw.Write(tagContents); } outputtw.Write(">"); } } } //} continue; } else { if (IsInsideElement) { if (ic == (int)' ') { if (!insideTagSpaceEncountered) { insideTagSpaceEncountered = true; //if (!isTagClose) //{ // tagStack.Push(currentTag); //} } } if (!insideTagSpaceEncountered) { currentTag += (char)ic; } } } if (IsInsideElement || insideTagSpaceEncountered) { write = false; if (insideTagSpaceEncountered) { tagContents += (char)ic; } } if (!IsInsideElement || treatTagsAsContent) { currentTextLength++; } currentLength++; if (currentTextLength <= length || (lengthReached && IsInsideElement)) { if (write) { outputtw.Write((char)ic); } } if (!lengthReached && currentTextLength >= length) { //reached truncate point if (addElipsis) { outputtw.Write("…"); } lengthReached = true; } } } } } outputtw.Flush(); outputms.Position = 0; using (TextReader outputtr = new StreamReader(outputms)) { return outputtr.ReadToEnd().Replace(" ", " ").Trim(); } } } } }
No comments:
Post a Comment