From 5a9b04b099d81344677cfd1651cd5ba94326069e Mon Sep 17 00:00:00 2001 From: jnyrup Date: Tue, 19 Dec 2017 13:07:55 +0100 Subject: [PATCH] string optimizations --- src/HtmlAgilityPack.Shared/HtmlAttribute.cs | 2 +- src/HtmlAgilityPack.Shared/HtmlDocument.cs | 9 +++-- src/HtmlAgilityPack.Shared/HtmlEntity.cs | 25 ++++++++---- src/HtmlAgilityPack.Shared/HtmlNode.cs | 6 +-- .../HtmlNodeNavigator.cs | 28 ++++++------- src/HtmlAgilityPack.Shared/HtmlWeb.cs | 40 +++++++++---------- .../MixedCodeDocument.cs | 4 +- 7 files changed, 63 insertions(+), 51 deletions(-) diff --git a/src/HtmlAgilityPack.Shared/HtmlAttribute.cs b/src/HtmlAgilityPack.Shared/HtmlAttribute.cs index 384188e9..82a511a0 100644 --- a/src/HtmlAgilityPack.Shared/HtmlAttribute.cs +++ b/src/HtmlAgilityPack.Shared/HtmlAttribute.cs @@ -261,7 +261,7 @@ private string GetRelativeXpath() i++; } - return "@" + Name + "[" + i + "]"; + return "@" + Name + "[" + i.ToString() + "]"; } #endregion diff --git a/src/HtmlAgilityPack.Shared/HtmlDocument.cs b/src/HtmlAgilityPack.Shared/HtmlDocument.cs index 19d3759a..28a30255 100644 --- a/src/HtmlAgilityPack.Shared/HtmlDocument.cs +++ b/src/HtmlAgilityPack.Shared/HtmlDocument.cs @@ -1604,8 +1604,9 @@ private void Parse() // check buffer end if ((_currentnode._namelength + 3) <= (Text.Length - (_index - 1))) { - if (string.Compare(Text.Substring(_index - 1, _currentnode._namelength + 2), - "') || (IsWhiteSpace(c))) @@ -1850,7 +1851,7 @@ private bool PushNodeEnd(int index, bool close) if ((close) || (!_currentnode._starttag)) { if ((OptionStopperNodeName != null) && (_remainder == null) && - (string.Compare(_currentnode.Name, OptionStopperNodeName, StringComparison.OrdinalIgnoreCase) == 0)) + (string.Equals(_currentnode.Name, OptionStopperNodeName, StringComparison.OrdinalIgnoreCase))) { _remainderOffset = index; _remainder = Text.Substring(_remainderOffset); @@ -1904,7 +1905,7 @@ private void ReadDocumentEncoding(HtmlNode node) HtmlAttribute att = node.Attributes["http-equiv"]; if (att == null) return; - if (string.Compare(att.Value, "content-type", StringComparison.OrdinalIgnoreCase) != 0) + if (!string.Equals(att.Value, "content-type", StringComparison.OrdinalIgnoreCase)) return; HtmlAttribute content = node.Attributes["content"]; if (content != null) diff --git a/src/HtmlAgilityPack.Shared/HtmlEntity.cs b/src/HtmlAgilityPack.Shared/HtmlEntity.cs index f1abc029..56ec63e8 100644 --- a/src/HtmlAgilityPack.Shared/HtmlEntity.cs +++ b/src/HtmlAgilityPack.Shared/HtmlEntity.cs @@ -646,7 +646,9 @@ public static string DeEntitize(string text) } catch { - sb.Append("&#" + e + ";"); + sb.Append("&#") + .Append(e) + .Append(';'); } } else @@ -656,7 +658,9 @@ public static string DeEntitize(string text) if (!_entityValue.TryGetValue(entity.ToString(), out code)) { // nope - sb.Append("&" + entity + ";"); + sb.Append('&') + .Append(entity) + .Append(';'); } else { @@ -671,7 +675,8 @@ public static string DeEntitize(string text) case '&': // new entity start without end, it was not an entity... - sb.Append("&" + entity); + sb.Append('&') + .Append(entity); entity.Remove(0, entity.Length); break; @@ -681,7 +686,8 @@ public static string DeEntitize(string text) { // unknown stuff, just don't touch it state = ParseState.Text; - sb.Append("&" + entity); + sb.Append('&') + .Append(entity); entity.Remove(0, entity.Length); } break; @@ -693,7 +699,8 @@ public static string DeEntitize(string text) // finish the work if (state == ParseState.EntityStart) { - sb.Append("&" + entity); + sb.Append('&') + .Append(entity); } return sb.ToString(); } @@ -784,11 +791,15 @@ public static string Entitize(string text, bool useNames, bool entitizeQuotAmpAn if ((entity == null) || (!useNames)) { - sb.Append("&#" + code + ";"); + sb.Append("&#") + .Append(code) + .Append(';'); } else { - sb.Append("&" + entity + ";"); + sb.Append('&') + .Append(entity) + .Append(';'); } } else diff --git a/src/HtmlAgilityPack.Shared/HtmlNode.cs b/src/HtmlAgilityPack.Shared/HtmlNode.cs index 06b96c56..b3db52e0 100644 --- a/src/HtmlAgilityPack.Shared/HtmlNode.cs +++ b/src/HtmlAgilityPack.Shared/HtmlNode.cs @@ -1548,7 +1548,7 @@ public void WriteTo(TextWriter outText, int level=0) if (_ownerdocument.OptionOutputAsXml) { var commentNode = (HtmlCommentNode)this; - if (!_ownerdocument.BackwardCompatibility && commentNode.Comment.ToLowerInvariant().StartsWith("" + (_currentnode.Attributes.Count > 0)); + InternalTrace(">" + (_currentnode.Attributes.Count > 0).ToString()); return (_currentnode.Attributes.Count > 0); } } @@ -247,7 +247,7 @@ public override bool HasChildren { get { - InternalTrace(">" + (_currentnode.ChildNodes.Count > 0)); + InternalTrace(">" + (_currentnode.ChildNodes.Count > 0).ToString()); return (_currentnode.ChildNodes.Count > 0); } } @@ -259,7 +259,7 @@ public override bool IsEmptyElement { get { - InternalTrace(">" + !HasChildren); + InternalTrace(">" + (!HasChildren).ToString()); // REVIEW: is this ok? return !HasChildren; } @@ -329,31 +329,31 @@ public override XPathNodeType NodeType switch (_currentnode.NodeType) { case HtmlNodeType.Comment: - InternalTrace(">" + XPathNodeType.Comment); + InternalTrace(">" + XPathNodeType.Comment.ToString()); return XPathNodeType.Comment; case HtmlNodeType.Document: - InternalTrace(">" + XPathNodeType.Root); + InternalTrace(">" + XPathNodeType.Root.ToString()); return XPathNodeType.Root; case HtmlNodeType.Text: - InternalTrace(">" + XPathNodeType.Text); + InternalTrace(">" + XPathNodeType.Text.ToString()); return XPathNodeType.Text; case HtmlNodeType.Element: { if (_attindex != -1) { - InternalTrace(">" + XPathNodeType.Attribute); + InternalTrace(">" + XPathNodeType.Attribute.ToString()); return XPathNodeType.Attribute; } - InternalTrace(">" + XPathNodeType.Element); + InternalTrace(">" + XPathNodeType.Element.ToString()); return XPathNodeType.Element; } default: throw new NotImplementedException("Internal error: Unhandled HtmlNodeType: " + - _currentnode.NodeType); + _currentnode.NodeType.ToString()); } } } @@ -378,7 +378,7 @@ public override string Value { get { - InternalTrace("nt=" + _currentnode.NodeType); + InternalTrace("nt=" + _currentnode.NodeType.ToString()); switch (_currentnode.NodeType) { case HtmlNodeType.Comment: @@ -405,7 +405,7 @@ public override string Value default: throw new NotImplementedException("Internal error: Unhandled HtmlNodeType: " + - _currentnode.NodeType); + _currentnode.NodeType.ToString()); } } } @@ -481,7 +481,7 @@ public override bool IsSamePosition(XPathNavigator other) InternalTrace(">false"); return false; } - InternalTrace(">" + (nav._currentnode == _currentnode)); + InternalTrace(">" + (nav._currentnode == _currentnode).ToString()); return (nav._currentnode == _currentnode); } @@ -498,9 +498,9 @@ public override bool MoveTo(XPathNavigator other) InternalTrace(">false (nav is not an HtmlNodeNavigator)"); return false; } - InternalTrace("moveto oid=" + nav.GetHashCode() + InternalTrace("moveto oid=" + nav.GetHashCode().ToString() + ", n:" + nav._currentnode.Name - + ", a:" + nav._attindex); + + ", a:" + nav._attindex.ToString()); if (nav._doc == _doc) { diff --git a/src/HtmlAgilityPack.Shared/HtmlWeb.cs b/src/HtmlAgilityPack.Shared/HtmlWeb.cs index 7edfba8f..ebb146dc 100644 --- a/src/HtmlAgilityPack.Shared/HtmlWeb.cs +++ b/src/HtmlAgilityPack.Shared/HtmlWeb.cs @@ -927,9 +927,7 @@ public static string GetContentTypeForExtension(string extension, string def) string contentType = ""; if (!helper.GetIsRegistryAvailable()) { - if (MimeTypes.ContainsKey(extension)) - contentType = MimeTypes[extension]; - else + if (!MimeTypes.TryGetValue(extension, out contentType)) contentType = def; } @@ -1648,9 +1646,6 @@ private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc _requestDuration = Environment.TickCount - tc; _responseUri = resp.ResponseUri; - bool html = IsHtmlContent(resp.ContentType); - bool isUnknown = string.IsNullOrEmpty(resp.ContentType); - Encoding respenc = !string.IsNullOrEmpty(resp.ContentEncoding) ? Encoding.GetEncoding(resp.ContentEncoding) : null; @@ -1718,6 +1713,10 @@ private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc else { // try to work in-memory + + bool html = IsHtmlContent(resp.ContentType); + bool isUnknown = string.IsNullOrEmpty(resp.ContentType); + if (doc != null && html) { if (respenc == null) @@ -1876,14 +1875,6 @@ private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc _requestDuration = Environment.TickCount - tc; _responseUri = response.RequestMessage.RequestUri; - bool isUnknown = response.Content.Headers.ContentType == null; - bool html = !isUnknown && IsHtmlContent(response.Content.Headers.ContentType.MediaType); - - var encoding = response.Content.Headers.ContentEncoding.FirstOrDefault(); - Encoding respenc = !string.IsNullOrEmpty(encoding) - ? Encoding.GetEncoding(encoding) - : null; - if(CaptureRedirect) { // Found == 302 @@ -1936,6 +1927,15 @@ private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc else { // try to work in-memory + + bool isUnknown = response.Content.Headers.ContentType == null; + bool html = !isUnknown && IsHtmlContent(response.Content.Headers.ContentType.MediaType); + + var encoding = response.Content.Headers.ContentEncoding.FirstOrDefault(); + Encoding respenc = !string.IsNullOrEmpty(encoding) + ? Encoding.GetEncoding(encoding) + : null; + if ((doc != null) && (html)) { if (respenc != null) @@ -2011,14 +2011,14 @@ private bool IsCacheHtmlContent(string path) } #endif - private bool IsHtmlContent(string contentType) + private static bool IsHtmlContent(string contentType) { - return contentType.ToLower().StartsWith("text/html"); + return contentType.StartsWith("text/html", StringComparison.OrdinalIgnoreCase); } - private bool IsGZipEncoding(string contentEncoding) + private static bool IsGZipEncoding(string contentEncoding) { - return contentEncoding.ToLower().StartsWith("gzip"); + return contentEncoding.StartsWith("gzip", StringComparison.OrdinalIgnoreCase); } #if !NETSTANDARD @@ -2062,7 +2062,7 @@ private void SaveCacheHeaders(Uri requestUri, HttpWebResponse resp) XmlNode cache = doc.FirstChild; foreach (string header in resp.Headers) { - XmlNode entry = doc.CreateElement("h"); + XmlElement entry = doc.CreateElement("h"); XmlAttribute att = doc.CreateAttribute("n"); att.Value = header; entry.Attributes.Append(att); @@ -2087,7 +2087,7 @@ private void SaveCacheHeaders(Uri requestUri, HttpResponseMessage resp) XmlNode cache = doc.FirstChild; foreach (var header in resp.Headers) { - XmlNode entry = doc.CreateElement("h"); + XmlElement entry = doc.CreateElement("h"); XmlAttribute att = doc.CreateAttribute("n"); att.Value = header.Key; entry.Attributes.Append(att); diff --git a/src/HtmlAgilityPack.Shared/MixedCodeDocument.cs b/src/HtmlAgilityPack.Shared/MixedCodeDocument.cs index e449e951..1765b649 100644 --- a/src/HtmlAgilityPack.Shared/MixedCodeDocument.cs +++ b/src/HtmlAgilityPack.Shared/MixedCodeDocument.cs @@ -435,7 +435,7 @@ private void Parse() case ParseState.Text: if (_index + TokenCodeStart.Length < _text.Length) { - if (_text.Substring(_index - 1, TokenCodeStart.Length) == TokenCodeStart) + if (string.CompareOrdinal(_text, _index - 1, TokenCodeStart, 0, TokenCodeStart.Length) == 0) { _state = ParseState.Code; _currentfragment.Length = _index - 1 - _currentfragment.Index; @@ -449,7 +449,7 @@ private void Parse() case ParseState.Code: if (_index + TokenCodeEnd.Length < _text.Length) { - if (_text.Substring(_index - 1, TokenCodeEnd.Length) == TokenCodeEnd) + if (string.CompareOrdinal(_text, _index - 1, TokenCodeEnd, 0, TokenCodeEnd.Length) == 0) { _state = ParseState.Text; _currentfragment.Length = _index + TokenCodeEnd.Length - _currentfragment.Index;