Every item of a list whose

NigelGarvey · August 9, 2018, 10:32pm

Here’s a quasi ‘where’ filter handler for AS lists. The predicate can be presented in various ways for convenience. Demos at the bottom of the script.

(*
	Handler:  filterList:forInstance:ofClass:|where|: — Quasi 'whose'/'where' filter for lists.

	filterList:
		The list from which to derive the filtered result.
		
	forInstance:
		Either (1) an integer denoting the instance number of the match to return;
		or (2) a list of two integers, indicating the range of matching instances to return;
		or (3) an empty list, signifying that every matching item should be returned.
		Values which are coercible to integer may be used instead of actual integers.
		
	ofClass:
		A class keyword for the class of item to match. It will need to be parenthesised if coded directly into the call.
		
	|where|:
		Either (1) a script object containing an isMatch(a) handler which tests a passed item against the required 'whose'/'where' condition(s);
		or (2) text containing source code for an AS predicate which might come after 'where' in a real filter. eg.:
			"it is 7"
			"(item 4 is 5) and ((item 2 begins with \"z\") or (item 7 is {|name|:\"Fred\", age:109}))"
		or: (3) a list containing text(s) and/or {text, actual value} list(s) from which such a predicate can be constucted.
			{"it is 7"}
			{"(item 4 is 5)", "and", "((item 2 begins with \"z\")", "or", "(item 7 is {|name|:\"Fred\", age:109}))"}
			{{"item 4 is", 5}, "and (", {"item 2 begins with", "z"}, "or", {"item 7 is", {|name|:"Fred", age:109}}, ")"}
			
	Result:
		The requested match(es) if fully achievable. Otherwise an error.
*)

on filterList:theList forInstance:n ofClass:requiredClass |where|:whereConditions
	-- Original and result lists in a script object for speed of access.
	script o
		property originalList : theList
		property matchedItems : {}
	end script
	
	-- Analyse the instance parameter. Indexed single match, range, or every?
	try
		set singleMatchWanted to (class of n is not list)
		if (singleMatchWanted) then
			-- If it's not a list, make sure it's a non-zero integer.
			set n to n as integer
			if (n is 0) then error
			-- If it's negative, it's convenient to reverse the source list and use a positive index.
			if (n < 0) then
				set o's originalList to reverse of o's originalList
				set n to -n
			end if
		else if (n is not {}) then
			-- If it's a non-empty list, check that it only contains two items and derive non-zero integers from both of them.
			if ((count n) is not 2) then error
			set {n1, n2} to {beginning of n as integer, end of n as integer}
			if ((n1 is 0) or (n2 is 0)) then error
		end if
	on error
		error "filterList:forInstance:ofClass:|where|: : bad forInstance: parameter."
	end try
	
	-- Analyse the 'whose'/'where' conditions.
	set classOfWhereConditions to class of whereConditions
	if (classOfWhereConditions is script) then
		-- If they're supplied as a script object, use that to match values.
		set matcher to whereConditions
	else if (classOfWhereConditions is text) then
		-- If as a line of text, insert it into the source code for a script-object-creating script (!) and run the script.
		set matcher to (run script ¬
			("on run
				script
					on isMatch(a)
						tell a to return (" & whereConditions & ")
					end
				end
				return result
			end"))
	else if (classOfWhereConditions is list) then
		-- If as a list, assemble the source code for a script-object-creating script from its contents. This script will take a parameter list.
		set matcherCode to "on run argv" & linefeed & "script" & linefeed & "on isMatch(a)" & linefeed & "tell a to return ("
		set argv to {}
		set argvCount to 0
		repeat with i from 1 to (count whereConditions)
			set thisFragment to item i of whereConditions
			set classOfThisFragment to class of thisFragment
			if (classOfThisFragment is text) then
				-- Append items which are simply text to the source code.
				set matcherCode to matcherCode & " " & thisFragment
			else if ((classOfThisFragment is list) and ((count thisFragment) is 2)) then
				-- With items which are two-item lists containing a text and another value, append the text to the source code along with an index reference into the parameter list and add the other item to that list.
				set fragmentCode to beginning of thisFragment
				if (class of fragmentCode is not text) then error
				set argvCount to argvCount + 1
				set matcherCode to matcherCode & (" (" & fragmentCode & " (item " & argvCount & " of argv))")
				set end of argv to end of thisFragment
			else
				error
			end if
		end repeat
		set matcherCode to matcherCode & (")" & linefeed & "end" & linefeed & "end" & linefeed & "return result" & linefeed & "end")
		-- Run the created source code to get the script object.
		set matcher to (run script matcherCode with parameters argv)
	else
		error "filterList:forInstance:ofClass:|where|: : bad |where|: parameter."
	end if
	
	-- Work through the original list, testing each item for the required class and, where that matches, for the 'where' conditions.
	set matchCount to 0
	repeat with i from 1 to (count o's originalList)
		set thisItem to item i of o's originalList
		set classOfThisItem to class of thisItem
		if (((classOfThisItem is requiredClass) or (requiredClass is item) or ((requiredClass is number) and ((classOfThisItem is integer) or (classOfThisItem is real)))) and (matcher's isMatch(thisItem))) then
			set matchCount to matchCount + 1
			if (singleMatchWanted) then
				-- If returning a single match, simply return it when shows up.
				if (matchCount = n) then return thisItem
			else
				-- Otherwise add all matches to the matched items list.
				set end of o's matchedItems to thisItem
			end if
		end if
	end repeat
	if (singleMatchWanted) then
		-- An nth match wasn't returned above.
		error "filterList:forInstance:ofClass:|where|: : Can't get instance " & n & " of " & matchCount & " matches." number -1728
	else if (n is {}) then
		-- Return every matched item (if any).
		return o's matchedItems
	else
		-- Try to return the requested range of matched items.
		try
			return items n1 thru n2 of o's matchedItems
		on error
			error "filterList:forInstance:ofClass:|where|: : Can't get instances " & n1 & " thru " & n2 & " of " & matchCount & " matches." number -1728
		end try
	end if
end filterList:forInstance:ofClass:|where|:


(* Demos: *)

-- A list containing some four-item lists and a few records and numbers.
set listOfMixedItems to {{missing value, "The Great Escape", missing value, 3}, {missing value, "Attack of the Killer Aardvark", missing value, 2}, {|name|:"Fred", age:27}, {missing value, "The Framework Foundation", missing value, 1}, {missing value, "Bourne Again", missing value, 3}, -5, {|name|:"Fred", age:109}, {|name|:"Bert", age:74}, 99, {missing value, "Harry Potter and the Finder Script", missing value, 3}, 75, {|name|:"Fred", age:45}, 75.0, {missing value, "The Ed Stockly Story", missing value, 5}, 5, {missing value, "Star Wars Episode CCXIV: The Empire Goes Bankrupt", missing value, 1}, {missing value, "Colonel Panic", missing value, 1}, {missing value, "The Bucket Dictionary", missing value, 1}, 2.4}

-- To get the first list of listOfMixedItems where ((item 4 is 1) and ((item 2 contains "the") or (item 2 contains "Panic"))),
-- make the filterList: parameter listOfMixedItems, the forInstance: parameter 1, the ofClass: parameter (list),
-- and the |where|: parameter either a script object with an isMatch() handler performing the relevant AS 'whose/where' predicate …
script theseConditionAreMet
	on isMatch(a)
		tell a to return ((item 4 is 1) and ((item 2 contains "the") or (item 2 contains "Panic")))
	end isMatch
end script
my filterList:listOfMixedItems forInstance:1 ofClass:(list) |where|:theseConditionAreMet
-- … or a text version of the predicate …
my filterList:listOfMixedItems forInstance:1 ofClass:(list) |where|:"(item 4 is 1) and ((item 2 contains \"the\") or (item 2 contains \"Panic\"))"
-- … or a list containing a text version of the predicate or the parts thereof …
my filterList:listOfMixedItems forInstance:1 ofClass:(list) |where|:{"(item 4 is 1)", "and", "((item 2 contains \"the\") or (item 2 contains \"Panic\"))"}
-- … or a list containing similar text(s) and/or lists representing templates for individual conditions, each of these containing a single predicate text and an actual value.
set {int, w1, w2} to {1, "the", "panic"}
my filterList:listOfMixedItems forInstance:1 ofClass:(list) |where|:{{"item 4 is", int}, "and (", {"item 2 contains", w1}, "or", {"item 2 contains", w2}, ")"}

-- To get the last list which matches the conditions, make the forInstance: value -1.
my filterList:listOfMixedItems forInstance:-1 ofClass:(list) |where|:theseConditionAreMet

-- For a range of matching lists, say matches 1 thru 2, make the forInstance: value a list containing two integers.
my filterList:listOfMixedItems forInstance:{1, 2} ofClass:(list) |where|:theseConditionAreMet

-- To get every matching list, make the forInstance: value an empty list.
my filterList:listOfMixedItems forInstance:{} ofClass:(list) |where|:theseConditionAreMet

(* Other examples: *)

-- Every number of listOfMixedItems where ((its class is real) or (it > 10)).
my filterList:listOfMixedItems forInstance:{} ofClass:(number) |where|:{{"its class is ", real}, "or", {"it >", 10}}

-- The last two records of listOfMixedItems where ((its |name| is "Fred) and (its age < 80))
my filterList:listOfMixedItems forInstance:{-2, -1} ofClass:(record) |where|:{{"its |name| is", "Fred"}, "and", {"its age <", 80}}

It’s also possible to filter lists of application objects, but it may depend on the application. The class parameter must be specific rather than generic (eg. document file rather than just file in the Finder) or it can be sidelined altogether by passing (item).

-- A list of Finder items.
tell application "Finder" to set l to items of desktop

-- Get every document file of the list whose name extension begins with "scpt"
-- Either:
script classIsDocumentFileAndNameExtensionBeginsWithScpt
	on isMatch(a)
		tell application "Finder"
			return (a's name extension begins with "scpt")
		end tell
	end isMatch
end script
using terms from application "Finder" -- For 'document file'.
	my filterList:l forInstance:{} ofClass:(document file) |where|:classIsDocumentFileAndNameExtensionBeginsWithScpt
end using terms from

-- Or some variation on:
using terms from application "Finder" -- For 'document file'.
	my filterList:l forInstance:{} ofClass:(document file) |where|:{{"its name extension begins with", "scpt"}}
end using terms from

The above demo, which fetches every item of the desktop and then sorts out the relevant document files twice for demo purposes, takes only half as long on my machine as the standard Finder method executed just once!

tell application "Finder"
	set l to (document files of desktop) whose name extension begins with "scpt"
end tell

estockly · August 10, 2018, 1:03am

My data breaks this script in ScriptDebugger. I get an internal table overflow here:

my filterList:listOfMixedItems forInstance:1 ofClass:(list) |where|:theseConditionAreMet

My data is about 2900 records each with between 13 and 19 fields

Interesting, in Script Editor it gets past this roadblock.

NigelGarvey · August 10, 2018, 7:39am

It’s hard to tell what’s happening from that one line. If your list contains records, the ofClass: parameter should be (record) and the code represented by theseConditionAreMet should compare some known property or properties of a record against some value or values. If theseConditionAreMet is a script object, it might be something like …

script theseConditionAreMet
	on isMatch(a)
		return (a's |title| is "The Great Escape")
	end isMatch
end script

… or …

script theseConditionAreMet
	on isMatch(a)
		tell a to return (its |title| is "The Great Escape")
	end isMatch
end script

If the |where|: parameter is text or a list of text, the script generates its own script object in the second form above and the text should represent whatever comes after return, including an its before each property label.

estockly · August 10, 2018, 4:16pm

My list is pure text, even the numbers are text.

But maybe this should be filed as an SD bug report? That line executes just fine in Script Editor, but I get the “Internal Table Overflow” error in Script Debugger.

FWIW, your script is exactly what I need, plus a lot of things I don’t need. Thanks!

My lists are pure text, nothing else. I’m trying to modify your script to handle text only comparisons, but even that seems pretty complex.

It seems the big speed boost comes from using the script object?

The overview is I have a file that contains every show airing on TV in 24 hours and I work with seven of those files for a week. On file contains around 3k shows.

From that I’m getting a list of all the movies airing.

Since some movies air multiple times and on multiple channels, I need to consolidate all the channels/times for each movie.

Plus, since there are a number of movies with exactly the same title, I need to identify the movie by title/year of release. (They say there actually is one case where two films with the same name were released the same year, but I haven’t encountered that, AFAIK).

My script reads the pipe/return delimited data; converts it to lists of lists and extracts all the data labeled “MOVIE”; while it’s doing that it extracts a separate list of every movie TITLE/YEAR.

Then (here’s where your script would come in) it goes through the list of titles/years and makes a new list of every item that has the same title/year. It goes through that list and consolidates the channel date information.

Then a new list is created with the consolidated titles. From that various lists are extracted.

The final product would be:

The Great Escape (1963) Steve McQueen, James Garner and Richard Attenborough. Allied prisoners in a German POW camp plot and execute an elaborate escape. HBO Mon. 8 and 11 p.m. Tues., 11 a.m.; Fox Sat 2 p.m.

This data is then used to create an alphabetical listing; four star films; box office hits.

NigelGarvey · August 10, 2018, 7:04pm

So if I’ve got this right, you have what’s essentially a CSV file, but with bars instead of commas. You’re turning it into a list of lists of text, each sublist corresponding to a movie being shown and each text in that sublist being a particular detail about the movie, corresponding to a column in the original data.

It’s after that that I’m not clear. You next want to group sublists having similar TITLE/YEAR entries. Is that that right? It would be faster and simpler just to sort the whole list of lists on those entries than to extract a separate list of TITLE/YEAR entries and do separate searches for every list whose TITLE/YEAR matches each item in it. I have a sort handler which can do this, but I don’t want to waste your time with it if I’ve misunderstood what you’re trying to do! It can do subsorts on other columns at the same time if required, which might be what you’d need for your final listing, or if TITLE and YEAR are in fact separate columns.

estockly · August 10, 2018, 10:06pm

This is what the final output looks like. It’s basically four lists: Four Star movies; Box Office Hits; Indies and imports; and movies A-Z. http://www.latimes.com/entertainment/tv/showtracker/la-et-tv-movies-htmlstory.html

Here’s what the raw data looks like.

6:00 am|(AUD)|MOVIE|Dirty Dancing 2: Havana Nights|HH|||(2004)|Diego Luna, Romola Garai.|Love blossoms between a young Cuban and an American teenager as they prepare for a New Year's Eve dance contest.|||(PG-13)|1 hr. 26 mins.|||
6:00 am|(AXS)|LOG|The Big Interview With Dan Rather|John Fogerty|John Fogerty of Creedence Clearwater Revival looks back on his illustrious career.|||(TV14)|(s)|(cc)|||1 hr.|
6:00 am|(BBCA)|LOG|Dirk Gently's Holistic Detective Agency|Shapes and Colors|The son of the dead couple shows up; Amanda goes to a mystical coven in the woods of Wendimoor, where she masters her unique powers.|||(TVMA)||(cc)|||1 hr.|

Basically, yes. I’m extracting all the |MOVIE| records from the list and just working with those.

That’s actually what I’m doing now. I’m adding a “movieID” field to each item that includes title/year. Then sorting, then going through the sorted list combining the channels/times.

They are separate columns in the raw data, and it would probably save time to not have to add an item to the record.

NigelGarvey · August 11, 2018, 12:35am

“Unfortunately, our website is currently unavailable in most European countries. We are engaged on the issue and committed to looking at options that support our full range of digital offerings to the EU market. We continue to identify technical compliance solutions that will provide all readers with our award-winning journalism.”

But there’s enough in your example to go on.

My customisable sort is here. It’s a lot of code, but fast. It can be saved as a library script (leaving out the demo handlers at the bottom, if you like), say with the name “Custom Iterative Dual-pivot Quicksort.scpt”.

My thought is that you turn the entire data into a list of lists the way you’re probably doing:

set listOfLists to paragraphs of rawData
set astid to AppleScript's text item delimiters
set AppleScript's text item delimiters to "|"
repeat with thisLine in listOfLists
	set thisLine's contents to thisLine's text items
end repeat
set AppleScript's text item delimiters to astid

Then sort the list using this code:

-- Sort-customising script object.
-- Compares lists from a list of lists such that
-- lists whose third item is "Movie" are grouped at the beginning of the main list and are sorted by title (item 4) and subsorted by year (item 8).
script moviesFirstByNameThenYear
	-- Determine whether or not list a should be moved to after list b.
	on isGreater(a, b)
		if (item 3 of b is "MOVIE") then
			if (item 3 of a is "MOVIE") then
				-- a and b are both "MOVIE" lists. Get their titles.
				set titleA to item 4 of a
				set titleB to item 4 of b
				if (titleA = titleB) then
					-- The titles are the same. Does a's year come after b's?
					return (item 8 of a comes after item 8 of b)
				else
					-- The titles are different. Does a's come after b's?.
					return (titleA comes after titleB)
				end if
			else
				-- b is a movie and a isn't, so a should go after b.
				return true
			end if
		else
			-- b isn't a movie, so it doesn't matter what a is. They can stay as the are.
			return false
		end if
	end isGreater
end script

tell script "Custom Iterative Dual-pivot Quicksort" to sort(listOfLists, 1, -1, {comparer:moviesFirstByNameThenYear})

At this point, listOfLists is sorted with the “MOVIE” lists at the beginning, sorted by name and subsorted by year. The order of the non-movie lists is undefined. You can work through from the beginning, deriving the text you need and stopping when you reach the first non-movie.

NigelGarvey · August 11, 2018, 7:29am

I wrote:

script moviesFirstByNameThenYear
	-- Determine whether or not list a should be moved to after list b.
	on isGreater(a, b)
		if (item 3 of b is "MOVIE") then
			if (item 3 of a is "MOVIE") then
				-- a and b are both "MOVIE" lists. Get their titles.
				set titleA to item 4 of a
				set titleB to item 4 of b
				if (titleA = titleB) then
					-- The titles are the same. Does a's year come after b's?
					return (item 8 of a comes after item 8 of b)
				else
					-- The titles are different. Does a's come after b's?.
					return (titleA comes after titleB)
				end if
			else
				-- b is a movie and a isn't, so a should go after b.
				return true
			end if
		else
			-- b isn't a movie, so it doesn't matter what a is. They can stay as the are.
			return false
		end if
	end isGreater
end script

This can be flattened slightly to:

script moviesFirstByNameThenYear
	-- Determine whether or not list a should be moved to after list b.
	on isGreater(a, b)
		if (item 3 of b is "MOVIE") then
			if (item 3 of a is "MOVIE") then
				-- a and b are both "MOVIE" lists. Get their titles.
				set titleA to item 4 of a
				set titleB to item 4 of b
				-- Return whether (the titles are the same and a's year comes after b's) or (a's title comes after b's).
				return (((titleA = titleB) and (item 8 of a comes after item 8 of b)) or (titleA comes after titleB))
			else
				-- b is a movie and a isn't, so a should go after b.
				return true
			end if
		else
			-- b isn't a movie, so it doesn't matter what a is. They can stay as the are.
			return false
		end if
	end isGreater
end script

Or with a slight loss of performance due to the negative comparison and potentially repeated list accessess:

script moviesFirstByNameThenYear
	-- Determine whether or not list a should be moved to after list b.
	on isGreater(a, b)
		return ((item 3 of b is "MOVIE") and ((item 3 of a is not "MOVIE") or (((item 4 of a = item 4 of b) and (item 8 of a comes after item 8 of b)) or (item 4 of a comes after item 4 of b))))
	end isGreater
end script