The requirements are the following:
- We have thousand of rich text components which has URL references to external pages
- These references needs to be replaced by an internal Sitecore item reference based on a field of the pages
I ended up with the following script:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$oldUrlField = "Old URL" | |
$oldUrlItems = "/sitecore/content/Home" | |
$rteField = "Text" | |
$rteQuery = "/sitecore/content/rich-text-components//*[(contains(@$rteField, '{0}'))]" | |
$languages = "en","hu-HU" | |
foreach ($language in $languages) { | |
Write-Host "Start fixing references for language: $language" | |
Write-Log "Start fixing references for language: $language" | |
$pageItems = Get-ChildItem -Recurse -Path $oldUrlItems -Language $language | |
# get page items | |
foreach ($pageItem in $pageItems) { | |
if ($pageItem.Version -eq 0) { | |
continue | |
} | |
# get old url | |
$oldUrl = $pageItem[$oldUrlField] | |
if ($oldUrl -eq "") { | |
continue | |
} | |
$oldUri = New-Object -TypeName System.Uri -ArgumentList $oldUrl | |
# search for relative path | |
$searchFor = $oldUri.PathAndQuery.TrimEnd("/") | |
$builtQuery = [string]::Format($rteQuery, $searchFor) | |
# get cross references | |
$foundCrossReferences = Get-Item -Path "master:" -Query $builtQuery -Language $language | |
foreach ($contentItem in $foundCrossReferences) { | |
$contentField = $contentItem["Content"] | |
$likeCondition = "*" + $searchFor + "*" | |
# source: https://docs.microsoft.com/en-us/dotnet/standard/base-types/regular-expression-example-scanning-for-hrefs | |
$hrefPattern = "href\s*=\s*(?:[\`"'](?<1>[^\`"']*)[\`"']|(?<1>\S+))" | |
$regex = [regex]::new($hrefPattern) | |
$m = $regex.Match($contentField, [Text.RegularExpressions.RegexOptions]::IgnoreCase) | |
# searching for href="*" text | |
while ($m.Success) | |
{ | |
# href="www.old-domain.com/old-url/", with $m.Groups[1] you can get only the URL like http://www.old-domain.com/old-url/ | |
$foundHref = $m.Groups[0] | |
# Write-Host $foundHref | |
if ($foundHref -like $likeCondition) { | |
$log = "Fix URL: " + $pageItem.ID + " " + $contentItem.ID + " " + $likeCondition + " " + $foundHref | |
Write-Host $log | |
Write-Log $log | |
$replaceWith = "href=`"~/link.aspx?_id=" + $pageItem.ID.Guid.ToString("N").ToUpper() + "&_z=z`"" | |
$contentItem.Editing.BeginEdit() | |
$contentItem[$rteField] = $contentField.Replace($foundHref, $replaceWith); | |
$contentItem.Editing.EndEdit() | Out-Null | |
} | |
$m = $m.NextMatch() | |
} | |
} | |
} | |
} |