Comparison of regex removal of html tags and using memory stream for the same impression.
Compiled regex is faster.
class Program
{
static void Main(string[] args)
{
int repeats = 1;
Regex regex = new Regex(@"<(.|\n)*?>",RegexOptions.Compiled);
var htmlStr = @"
<div id='lipsum'>
<p>
</p><ul>
<li>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</li>
<li>Vestibulum tincidunt eros ac velit scelerisque pharetra.</li>
</ul>
<p></p>
<p>
</p><ul>
<li>Aenean pellentesque mauris et massa eleifend tristique.</li>
<li>In rutrum magna at arcu molestie porta.</li>
<li>Pellentesque rutrum nibh non est auctor varius.</li>
</ul>
<p></p>
<p>
</p><ul>
<li>Vestibulum elementum ante blandit risus cursus convallis.</li>
<li>In id magna lacinia, luctus sapien in, tempor elit.</li>
</ul>
<p></p>
<p>
</p><ul>
<li>Integer tempus tellus nec purus ultrices, quis facilisis magna adipiscing.</li>
<li>Cras convallis sapien vel augue ultrices pulvinar.</li>
<li>Sed pretium eros vel tellus feugiat, ut congue erat pellentesque.</li>
<li>Vestibulum convallis tortor congue sapien condimentum, sit amet vestibulum nisl tristique.</li>
<li>Cras non lacus sagittis, auctor massa eget, pulvinar elit.</li>
<li>Morbi eu augue tincidunt, luctus lacus vestibulum, varius elit.</li>
</ul>
<p></p>
<p>
</p><ul>
<li>Cras at dui sed justo convallis mattis.</li>
<li>Curabitur molestie mi nec dui interdum aliquet.</li>
<li>Maecenas malesuada magna non aliquam sollicitudin.</li>
<li>Maecenas porta erat quis turpis dictum faucibus.</li>
<li>Duis dignissim elit a ultrices tristique.</li>
</ul>
<p></p></div>";
//var timer1 = new Stopwatch();
//Console.WriteLine("Not compiled");
//timer1.Start();
//for (int i = 0; i < repeats; i++)
//{
// Do.StripHtmlFromString(htmlStr);
//}
//timer1.Stop();
var timer1 = new Stopwatch();
Console.WriteLine("Inhouse");
timer1.Start();
for (int i = 0; i < repeats; i++)
{
Do.DisplaySearchSummary(htmlStr, 150, regex);
}
timer1.Stop();
Console.WriteLine("Inhouse: " + timer1.ElapsedTicks);
var timer2 = new Stopwatch();
Console.WriteLine("umbraco");
timer2.Start();
for (int i = 0; i < repeats; i++)
{
Do.Truncate(htmlStr, 150, true, false);
}
timer2.Stop();
Console.WriteLine("Umbraco: " + timer2.ElapsedTicks);
Console.ReadKey();
Console.ReadKey();
}
}
public class Do
{
/// <summary>
/// Strips the HTML from string.
/// </summary>
/// <param name="htmlString">The HTML string.</param>
/// <returns>
/// String without html tags.
/// </returns>
public static string StripHtmlFromString(string htmlString, Regex regex)
{
if (string.IsNullOrEmpty(htmlString)) return htmlString;
return regex.Replace(htmlString,string.Empty);
}
public static string DisplaySearchSummary(string descriptionText, int requiredLength, Regex regex, string ending = "...")
{
descriptionText = StripHtmlFromString(descriptionText, regex);
if (!string.IsNullOrEmpty(descriptionText) && descriptionText.Length >= requiredLength)
{
// Get the required requiredLength of the string
var requiredtext = descriptionText.Substring(0, requiredLength - 1);
// Select the last occurence of a whitespace character to break the string correctly
var ouputtext = string.Concat(requiredtext.Substring(0, requiredtext.LastIndexOf(' ')), ending);
return ouputtext;
}
return descriptionText;
}
public static string Truncate(string html, int length, bool addElipsis, bool treatTagsAsContent)
{
using (var outputms = new MemoryStream())
{
using (var outputtw = new StreamWriter(outputms))
{
using (var ms = new MemoryStream())
{
using (var tw = new StreamWriter(ms))
{
tw.Write(html);
tw.Flush();
ms.Position = 0;
var tagStack = new Stack<string>();
using (TextReader tr = new StreamReader(ms))
{
bool IsInsideElement = false;
bool lengthReached = false;
int ic = 0;
int currentLength = 0, currentTextLength = 0;
string currentTag = string.Empty;
string tagContents = string.Empty;
bool insideTagSpaceEncountered = false;
bool isTagClose = false;
while ((ic = tr.Read()) != -1)
{
bool write = true;
if (ic == (int)'<')
{
if (!lengthReached)
{
IsInsideElement = true;
}
insideTagSpaceEncountered = false;
currentTag = string.Empty;
tagContents = string.Empty;
isTagClose = false;
if (tr.Peek() == (int)'/')
{
isTagClose = true;
}
}
else if (ic == (int)'>')
{
//if (IsInsideElement)
//{
IsInsideElement = false;
//if (write)
//{
// outputtw.Write('>');
//}
currentTextLength++;
if (isTagClose && tagStack.Count > 0)
{
string thisTag = tagStack.Pop();
outputtw.Write("</" + thisTag + ">");
}
if (!isTagClose && currentTag.Length > 0)
{
if (!lengthReached)
{
tagStack.Push(currentTag);
outputtw.Write("<" + currentTag);
if (tr.Peek() != (int)' ')
{
if (!string.IsNullOrEmpty(tagContents))
{
if (tagContents.EndsWith("/"))
{
//short close
tagStack.Pop();
}
outputtw.Write(tagContents);
}
outputtw.Write(">");
}
}
}
//}
continue;
}
else
{
if (IsInsideElement)
{
if (ic == (int)' ')
{
if (!insideTagSpaceEncountered)
{
insideTagSpaceEncountered = true;
//if (!isTagClose)
//{
// tagStack.Push(currentTag);
//}
}
}
if (!insideTagSpaceEncountered)
{
currentTag += (char)ic;
}
}
}
if (IsInsideElement || insideTagSpaceEncountered)
{
write = false;
if (insideTagSpaceEncountered)
{
tagContents += (char)ic;
}
}
if (!IsInsideElement || treatTagsAsContent)
{
currentTextLength++;
}
currentLength++;
if (currentTextLength <= length || (lengthReached && IsInsideElement))
{
if (write)
{
outputtw.Write((char)ic);
}
}
if (!lengthReached && currentTextLength >= length)
{
//reached truncate point
if (addElipsis)
{
outputtw.Write("…");
}
lengthReached = true;
}
}
}
}
}
outputtw.Flush();
outputms.Position = 0;
using (TextReader outputtr = new StreamReader(outputms))
{
return outputtr.ReadToEnd().Replace(" ", " ").Trim();
}
}
}
}
}