Here, FWIW are two handlers ( htmlFromRTF and rftFromHTML ) which aspire to:
- Capture the insights shared with me in these two threads, and
- serve as reusable building blocks next time they are needed.
( Incidentally both return an AS version of an Option Type – a record with a Left field (to hold any error messages), and a Right field to hold any values successfully obtained. )
One hint that I may still not be lining the ASObjC types up correctly, however, is that the htmlFromRTF still crashes Script Editor if I offer it an ill-formed RTF string (lacking leading and trailing curly braces).
This doesn’t seem to happen either with:
- rtfFromHTML, or
- two JS for Automation equivalents (further below).
So not quite sure whether this expresses an oversight on my part, or a fragility of some kind elsewhere. Probably prudent to assume the former until proved innocent.
AppleScript versions
use AppleScript version "2.4"
use framework "Foundation"
use framework "AppKit"
use scripting additions
-- exceptTags is a (possibly empty) list of any HTML tags that are not needed, e.g.
-- {"body", "doctype", "font", "head", "html", "p", "span", "style", "xml"}
-- htmlFromRTF :: [String] -> String -> Either String String
on htmlFromRTF(exceptTags, strRTF)
set ca to current application
set s to ca's NSString's stringWithString:(strRTF) -- braces as precaution against crash
set d to (s)'s dataUsingEncoding:(ca's NSUTF8StringEncoding)
set attStr to ca's NSAttributedString's alloc()'s initWithRTF:d documentAttributes:(missing value)
if attStr is missing value then
|Left|("String not recognised as RTF")
else
set {htmlData, err} to attStr's ¬
dataFromRange:{location:0, |length|:attStr's |length|()} ¬
documentAttributes:{DocumentType:"NSHTML", ExcludedElements:exceptTags} ¬
|error|:(reference)
if (missing value = htmlData) or (missing value is not err) then
|Left|(err's localizedDescription() as text)
else
|Right|((ca's NSString's alloc()'s ¬
initWithData:htmlData encoding:(ca's NSUTF8StringEncoding)) as text)
end if
end if
end htmlFromRTF
-- rtfFromHTML :: String -> Either String String
on rtfFromHTML(strHTML)
set ca to current application
set s to ca's NSString's stringWithString:strHTML
set d to (s)'s dataUsingEncoding:(ca's NSUTF8StringEncoding)
set attStr to ca's NSAttributedString's alloc()'s initWithHTML:d documentAttributes:(missing value)
if attStr is missing value then
|Left|("String could not be parsed as HTML")
else
set {rtfData, err} to attStr's ¬
dataFromRange:{location:0, |length|:attStr's |length|()} ¬
documentAttributes:{DocumentType:"NSRTF"} ¬
|error|:(reference)
if (missing value = rtfData) or (missing value is not err) then
|Left|(err's localizedDescription() as text)
else
|Right|((ca's NSString's alloc()'s ¬
initWithData:rtfData encoding:(ca's NSUTF8StringEncoding)) as text)
end if
end if
end rtfFromHTML
-- TEST ------------------------------------------------------------------
on run
set strRTF to "{\\rtf1\\ansi\\ansicpg1252\\cocoartf1504\\cocoasubrtf830\n{\\fonttbl\\f0\\fswiss\\fcharset0 Helvetica;}\n{\\colortbl;\\red255\\green255\\blue255;}\n{\\*\\expandedcolortbl;;}\n\\pard\\tx560\\tx1120\\tx1680\\tx2240\\tx2800\\tx3360\\tx3920\\tx4480\\tx5040\\tx5600\\tx6160\\tx6720\\qc\\partightenfactor0\n\n\\f0\\fs32 \\cf0 \\expnd0\\expndtw0\\kerning0\nGamma}"
-- OR:
--set strRTF to "spem <in/> Allium" -- to test ill-formed RTF (Seems to crash in Sierra AS without braces)
set lrHTML to htmlFromRTF({"body", "doctype", "font", "head", "html", "p", "span", "style", "xml"}, strRTF)
bindLR(lrHTML, rtfFromHTML) -- bindLR applies rtfHTML to any Right value, skipping any Left (failure) value
end run
-- GENERIC ------------------------------------------------------------------
-- Left :: a -> Either a b
on |Left|(x)
{type:"Either", |Left|:x, |Right|:missing value}
end |Left|
-- Right :: b -> Either a b
on |Right|(x)
{type:"Either", |Left|:missing value, |Right|:x}
end |Right|
-- bindLR (>>=) :: Either a -> (a -> Either b) -> Either b
on bindLR(m, mf)
if isRight(m) then
mReturn(mf)'s |λ|(|Right| of m)
else
m
end if
end bindLR
-- className :: NSObject -> String
on className(anyClass)
(current application's NSStringFromClass(anyClass's |class|())) as text
end className
-- isRight :: Either a b -> Bool
on isRight(x)
set dct to current application's ¬
NSDictionary's dictionaryWithDictionary:x
(dct's objectForKey:"type") as text = "Either" and ¬
(dct's objectForKey:"Left") as list = {missing value}
end isRight
-- Lift 2nd class handler function into 1st class script wrapper
-- mReturn :: First-class m => (a -> b) -> m (a -> b)
on mReturn(f)
if class of f is script then
f
else
script
property |λ| : f
end script
end if
end mReturn
JS for Automation (JXA) versions
(() => {
'use strict';
ObjC.import('AppKit');
// GENERIC FUNCTIONS --------------------------------------
// Left :: a -> Either a b
const Left = x => ({
type: 'Either',
Left: x
});
// Right :: b -> Either a b
const Right = x => ({
type: 'Either',
Right: x
});
// bindLR (>>=) :: Either a -> (a -> Either b) -> Either b
const bindLR = (m, mf) =>
m.Right !== undefined ? (
mf(m.Right)
) : m;
// HTML <-> RTF ---------------------------------------------------------
// htmlFromRTF :: [String] -> String -> Either String String
const htmlFromRTF = (exceptTags, strRTF) => {
const
as = $.NSAttributedString.alloc
.initWithRTFDocumentAttributes($(strRTF)
.dataUsingEncoding($.NSUTF8StringEncoding), 0
);
return bindLR(
typeof as
.dataFromRangeDocumentAttributesError !== 'function' ? (
Left('String could not be parsed as RTF')
) : Right(as),
// Function bound if Right value obtained above:
rtfAS => {
let error = $();
const htmlData = rtfAS
.dataFromRangeDocumentAttributesError({
'location': 0,
'length': rtfAS.length
}, {
DocumentType: 'NSHTML',
ExcludedElements: exceptTags
},
error
);
return Boolean(ObjC.unwrap(htmlData) && !error.code) ? Right(
ObjC.unwrap($.NSString.alloc.initWithDataEncoding(
htmlData,
$.NSUTF8StringEncoding
))
) : Left(ObjC.unwrap(error.localizedDescription));
}
);
};
// rtfFromHTML :: String -> Either String String
const rtfFromHTML = strHTML => {
const
as = $.NSAttributedString.alloc
.initWithHTMLDocumentAttributes($(strHTML)
.dataUsingEncoding($.NSUTF8StringEncoding), 0
);
return bindLR(
typeof as
.dataFromRangeDocumentAttributesError !== 'function' ? (
Left('String could not be parsed as HTML')
) : Right(as),
// Function bound if Right value obtained above:
htmlAS => {
let error = $();
const rtfData = htmlAS
.dataFromRangeDocumentAttributesError({
'location': 0,
'length': htmlAS.length
}, {
DocumentType: 'NSRTF'
},
error
);
return Boolean(ObjC.unwrap(rtfData) && !error.code) ? Right(
ObjC.unwrap($.NSString.alloc.initWithDataEncoding(
rtfData,
$.NSUTF8StringEncoding
))
) : Left(ObjC.unwrap(error.localizedDescription));
}
);
};
// TEST ------------------------------------------------------------------
return bindLR(
htmlFromRTF(['doctype', 'html', 'body', 'xml',
'style', 'p', 'head', 'span'
],
//"spem in Allium <>" // example of ill-formed RTF
"{\\rtf1\\ansi\\ansicpg1252\\cocoartf1504\\cocoasubrtf830\n{\\fonttbl\\f0\\fswiss\\fcharset0 Helvetica;}\n{\\colortbl;\\red255\\green255\\blue255;}\n{\\*\\expandedcolortbl;;}\n\\pard\\tx560\\tx1120\\tx1680\\tx2240\\tx2800\\tx3360\\tx3920\\tx4480\\tx5040\\tx5600\\tx6160\\tx6720\\qc\\partightenfactor0\n\n\\f0\\fs32 \\cf0 \\expnd0\\expndtw0\\kerning0\nGamma}"
),
// Applied to any Right value obtained above (skipped if Left):
rtfFromHTML
);
})();