using System;
using System.Text.RegularExpressions;
/*
* (c) Craig Dunn - ConceptDevelopment.NET
* 5-July-04
*
* To use:
*    string encoded = ExtendedHtmlUtility.HtmlEntityEncode ("test string with Unicode chars and & < >");
*    string decoded = ExtendedHtmlUtility.HtmlEntityDecode (encoded); // "string with & < >"
*/
public class ExtendedHtmlUtility {

    /// <summary>
    /// Based on the 'reflected' code (from the Framework System.Web.HttpServerUtility)
    /// listed on this page
    /// UrlEncode vs. HtmlEncode
    /// http://www.aspnetresources.com/blog/encoding_forms.aspx
    ///
    /// PDF of unicode characters in the 0-127 (dec) range
    /// http://www.unicode.org/charts/PDF/U0000.pdf
    /// </summary>
    /// <param name="unicodeText"></param>
    /// <returns>
    /// & becomes &amp;  (encoded for XML Comments - don't be confused)
    /// 1-9a-zA-Z and some punctuation (ASCII, basically) remain unchanged
    /// </returns>
    public static string HtmlEntityEncode (string unicodeText) {
	   int unicodeVal;
	   string encoded=String.Empty;
 	  foreach (char c in unicodeText) {
		  unicodeVal = c;
		  switch (unicodeVal) {
		      case '&':
		          encoded += "&amp;";
		          break;
		      case '<':
		          encoded += "&lt;";
		          break;
		      case '>':
		          encoded += "&gt;";
		          break;
		      default:
		          if ((c >= ' ') && (c <= 0x007E)) { // from 'space' to '~tilde' hex 20-7E (dec 32-127)
                     // in 'ascii' range x30 to x7a which is 0-9A-Za-z plus some punctuation
			         encoded += c;	// leave as-is
		          } else { // outside 'ascii' range - encode
		          encoded += string.Concat("&#",
			         unicodeVal.ToString(System.Globalization.NumberFormatInfo.InvariantInfo), ";");
		          }
		          break;
		  }
	   }
	   return encoded;
    } // HtmlEntityEncode


    /// <summary>
    /// Converts Html Entities back to their 'underlying' Unicode characters
    /// </summary>
    /// <param name="encodedText"></param>
    /// <returns>
    /// &amp; becomes &  (encoded for XML Comments - don't be confused)
    /// 1-9a-zA-Z and some punctuation (ASCII, basically) remain unchanged
    /// </returns>
    public static string HtmlEntityDecode (string encodedText) {
        return entityResolver.Replace (encodedText, new MatchEvaluator (ResolveEntity) );
    } // HtmlEntityDecode

    /// <summary>
    /// Static Regular Expression to match Html Entities in encoded text
    /// </summary>
    private static Regex entityResolver =
 	                              new Regex (@"([&][#](?'unicode'\d+);)|([&](?'html'\w+);)");


    /// <summary>
    /// Regex Match processing delegate to replace the Entities with their
    /// underlying Unicode character.
    ///
    /// List of entities
    /// http://www.vigay.com/inet/acorn/browse-html2.html#entities
    /// </summary>
    /// <param name="matchToProcess"></param>
    /// <returns>
    /// &amp; becomes &  (encoded for XML Comments - don't be confused)
    /// </returns>
    private static string ResolveEntity (System.Text.RegularExpressions.Match matchToProcess) {
	   string x = "X"; // default 'char placeholder' if cannot be resolved - shouldn't occur
	   if (matchToProcess.Groups["unicode"].Success) {
		  x = Convert.ToChar(Convert.ToInt32(matchToProcess.Groups["unicode"].Value) ).ToString();
	   } else {
		  if (matchToProcess.Groups["html"].Success) {
			 switch (matchToProcess.Groups["html"].Value.ToLower()) {
				    // this could be expanded to as many as you like, or (maybe)
				    // System.Web.HttpUtility.HtmlDecode will work on
				    // the whole 'entity' string... ?
				    case "nbsp": x = " ";    break;
				    case "copy": x = Convert.ToChar(0x00A9).ToString() ;  break;
				    case "lt"  : x = "&lt;"; break;
				    case "gt"  : x = "&gt;"; break;
				    case "amp" : x = "&";    break;
				    // finish this switch from this info
				    // http://www.vigay.com/inet/acorn/browse-html2.html#entities
				    // otherwise some will be 'X' (the default)
			 }
		  }
	   }
	   return x;
    } // ResolveEntity()
} // class ExtendedHtmlUtility