Jump to content

Module:data consistency check

Sabdkosh se

Documentation for this module may be created at Module:data consistency check/doc

local export = {}

local m_language_data = require("Module:languages/alldata")
local m_language_codes = require('Module:languages/code to canonical name')
local m_language_canonical_names = require('Module:languages/canonical names')
local m_etym_language_data = require("Module:etymology languages/data")
local m_family_data = require('Module:families/data')
local m_script_data = require('Module:scripts/data')

local m_table = require("Module:table")
local Array = require("Module:array")

local messages

local function discrepancy(modname, ...)
	local ok, result = pcall(function(...) messages[modname]:insert(string.format(...)) end, ...)
	if not ok then
		mw.log(result, ...)
	end
	
end

local all_codes = {}

local language_names = {}
local family_names = {}
local script_names = {}

local nonempty_families = {}
local allowed_empty_families = {tbq = true}
local nonempty_scripts = {}
	
local function link(name)
	if not name then
		return "???"
	elseif name:find("[Ll]anguage$") then
		return "[[:Category:" .. name .. "|" .. name .. "]]"
	else
		return "[[:Category:" .. name .. " language|" .. name .. " language]]"
	end
end
	
local function link_script(name)
	if not name then
		return "???"
	elseif name:find("[Cc]ode$") or name:find("[Ss]emaphore$") then
		return "[[:Category:" .. name:gsub("^%l", string.upper) .. "|" .. name .. "]]"
	else
		return "[[:Category:" .. name .. " script|" .. name .. " script]]"
	end
end

local function invalid_keys_message(modname, code, data, invalid_keys, is_script)
	local plural = #invalid_keys ~= 1
	discrepancy(modname, "The data key%s %s for %s (<code>%s</code>) %s invalid.",
		plural and "s" or "",
		invalid_keys
			:map(
				function(key)
					return '<code>' .. key .. '</code>'
				end)
			:concat(", "),
		(is_script and link_script or link)(data.canonicalName or data[1]),
		code,
		plural and "are" or "is")
end

local function check_data_keys(valid_keys, is_script)
	valid_keys = Array(valid_keys):to_set()
	
	return function (modname, code, data)
		local invalid_keys
		for k in pairs(data) do
			if not valid_keys[k] then
				invalid_keys = invalid_keys or Array()
				invalid_keys:insert(k)
			end
		end
		if invalid_keys then
			invalid_keys_message(modname, code, data, invalid_keys, is_script)
		end
	end
end

-- Modification of isArray in [[Module:table]].
local function find_gap(t)
	local i = 0
	for _ in pairs(t) do
		i = i + 1
		if t[i] == nil then
			return i
		end
	end
end

local function check_true_or_nil(modname, code, data, field_name)
	if not (data[field_name] == nil or data[field_name] == true) then
		discrepancy(modname,
			"%s (<code>%s</code>) has an <code>%s</code> value that is not <code>nil</code> or <code>true</code>: %s",
			link(canonical_name), code, field_name,
			tostring(data.link_tr)
		)
	end
end

local function check_array(modname, code, data, array_name, subarray_name)
	local subtable = data
	if subarray_name then
		subtable = assert(data[subarray_name], subarray_name)
	end
	local array_type = type(subtable[array_name])
	if array_type == "table" then
		local gap = find_gap(subtable[array_name])
		if gap then
			discrepancy(modname, "The %s array in %sthe data table for %s (<code>%s</code>) has a gap at index %d.",
				array_name,
				subarray_name and "the " .. subarray_name .. " field in " or "",
				data.canonicalName or data[1],
				code, gap)
		else
			return true
		end
	else
		discrepancy(modname, "The %s field in %sthe data table for %s (<code>%s</code>) should be an array (table) but is %s.",
			array_name,
			subarray_name and "the " .. subarray_name .. " field in " or "",
			data.canonicalName or data[1],
			code,
			array_type == "nil" and "nil" or "a " .. array_type)
	end
end

local function check_wikidata_item(modname, code, data, key)
	local data_item = data[key]
	if data_item == nil then
		return
	elseif type(data_item) == "number" then
		if not require "Module:table".isPositiveInteger(data_item) then
			discrepancy(modname, "%g, the Wikidata item id for %s (<code>%s</code>), is not a positive integer or a string in the correct format.",
				data_item, data.canonicalName or data[1], code)
		end
	elseif type(data_item) == "string" then
		if not data_item:find "^Q%d+$" then
			discrepancy(modname, "%s, the Wikidata item id for %s (<code>%s</code>), is not a string in the correct format or a positive integer.",
				data_item, data.canonicalName or data[1], code)
		end
	end
end

local function check_other_names_or_aliases(modname, code, canonical_name, data, data_key, allow_nested)
	local array = data[data_key]
	if not array then
		return
	end
	check_array(modname, code, data, data_key)

	local names = {}
	local function check_other_name(other_name)
		if other_name == canonical_name then
			discrepancy(modname,
				"%s, the canonical name for <code>%s</code>, is repeated in the table of <code>%s</code>.",
				canonical_name, code, data_key)
		end
		if names[other_name] then
			discrepancy(modname,
				"The name %s is found twice or more in the list of <code>%s</code> for %s (<code>%s</code>).",
				other_name, data_key, canonical_name, code)
		end
		names[other_name] = true
	end

	for _, other_name in ipairs(array) do
		if type(other_name) == "table" then
			if not allow_nested then
				discrepancy(modname,
					"A nested table is found in the list of <code>%s</code> for %s (<code>%s</code>), but isn't allowed.",
					data_key, canonical_name, code)
			else
				for _, on in ipairs(other_name) do
					check_other_name(on)
				end
			end
		else
			check_other_name(other_name)
		end
	end
end

local function check_other_names_aliases_varieties(modname, code, canonical_name, data)
	if data.otherNames then
		check_other_names_or_aliases(modname, code, canonical_name, data, "otherNames")
	end
	if data.aliases then
		check_other_names_or_aliases(modname, code, canonical_name, data, "aliases")
	end
	if data.varieties then
		check_other_names_or_aliases(modname, code, canonical_name, data, "varieties", true)
	end
end

local get_codepoint = mw.ustring.codepoint
local function validate_pattern(pattern, modname, code, data, standardChars)
	if type(pattern) ~= "string" then
		discrepancy(modname, '"%s", the %spattern for %s (<code>%s</code>), is not a string.',
			pattern, standardChars and 'standard character ' or '', code, data.canonicalName)
	end
	local ranges
	for lower, higher in mw.ustring.gmatch(pattern, "(.)%-(.)") do
		if get_codepoint(lower) >= get_codepoint(higher) then
			ranges = ranges or Array()
			table.insert(ranges, { lower, higher })
		end
	end
	if ranges and ranges[1] then
		local plural = #ranges ~= 1 and "s" or ""
		discrepancy(modname, '%s (<code>%s</code>) specifies an invalid pattern ' ..
			'for %scharacter detection: <code>"%s"</code>. The first codepoint%s ' ..
			'in the range%s %s %s must be less than the second.',
			link(data[1] or data.canonicalName), code, standardChars and 'standard ' or '', pattern, plural, plural,
			ranges
				:map(
					function(range)
						return range[1] .. "-" .. range[2] .. (" (U+%X, U+%X)")
							:format(get_codepoint(range[1]), get_codepoint(range[2]))
					end)
				:concat(", "),
			#ranges ~= 1 and "are" or "is")
	end
	if not pcall(mw.ustring.find, "", "[" .. pattern .. "]") then
		discrepancy(modname, '%s (<code>%s</code>) specifies an invalid pattern for ' ..
			(standardChars and 'standard' or '') .. ' character detection: <code>"%s"</code>',
			link(data.canonical_name), code, pattern)
	end
end

local remove_exceptions_addition = 0xF0000
local maximum_code_point = 0x10FFFF
local remove_exceptions_maximum_code_point = maximum_code_point - remove_exceptions_addition

local function check_entry_name_or_sortkey(modname, code, data, replacements_name)
	local canonical_name = data[1] or data.canonicalName
	
	local replacements = data[replacements_name]
	if type(replacements) == "string" then
		if not (replacements_name == "sort_key" or replacements_name == "entry_name") then
			discrepancy(modname, "The %s field in the data table for %s (<code>%s</code>) must be a table.",
				replacements_name, canonical_name, code)
		end
		return
	end
	
	if (replacements.from ~= nil) ~= (replacements.to ~= nil) then
		discrepancy(modname,
			"The <code>from</code> and <code>to</code> arrays in the <code>%s</code> table for %s (<code>%s</code>) are not both defined or both undefined.",
			replacements_name, canonical_name, code)
	elseif replacements.from then
		for _, key in ipairs { "from", "to" } do
			check_array(modname, code, data, key, replacements_name)
		end
	end
	
	if replacements.remove_diacritics and type(replacements.remove_diacritics) ~= "string" then
		discrepancy(modname,
			"The <code>remove_diacritics</code> field in the <code>%s</code> table for %s (<code>%s</code>) table must be a string.",
			replacements_name, canonical_name, code)
	end
	
	if replacements.remove_exceptions then
		if check_array(modname, code, data, "remove_exceptions", replacements_name) then
			for sequence_i, sequence in ipairs(replacements.remove_exceptions) do
				local code_point_i = 0
				for code_point in mw.ustring.gcodepoint(sequence) do
					code_point_i = code_point_i + 1
					if code_point > remove_exceptions_maximum_code_point then
						discrepancy(modname,
							"Code point #%d (0x%04X) in field #%d of the <code>remove_exceptions</code> array for %s (<code>%s</code>) is over U+%04X.",
							code_point_i, code_point, sequence_i, canonical_name, code, remove_exceptions_maximum_code_point)
					end
					
				end
			end
		end
	end
	
	if replacements.from and replacements.to
			and m_table.length(replacements.to) > m_table.length(replacements.from) then
		discrepancy(modname,
			"The <code>from</code> array in the <code>%s</code> table for %s (<code>%s</code>) must be shorter or the same length as the <code>to</code> array.",
			replacements_name, canonical_name, code)
	end
end

local function has_regular_language_child(parent_code)
	for code, data in pairs(m_language_data) do
		local ancestors = data.ancestors
		if ancestors then
			for _, ancestor in pairs(ancestors) do
				if ancestor == parent_code then
					return true
				end
			end
		end
	end
	return false
end

local function check_ancestors(modname, code, data, ancestors, is_etymology_language)
	check_array(modname, code, data, "ancestors")
	
	local canonical_name = data[1] or data.canonicalName
	if is_etymology_language then
		if not has_regular_language_child(code) then
			discrepancy(modname,
				"The etymology language %s (<code>%s</code>) has an <code>ancestors</code> field, "
				.. "but no regular languages list it as an ancestor.",
				link(canonical_name), code)
		end
	end
	
	for _, ancestor_code in ipairs(ancestors) do
		if not (m_language_data[ancestor_code] or m_etym_language_data[ancestor_code]) then
			discrepancy(modname,
				"%s (<code>%s</code>) lists an invalid language code <code>%s</code> as ancestor.",
				link(canonical_name), code, ancestor_code)
		end
	end
end

-- Just trying to not have a module error when someone puts a script code
-- in the position of a language code.
local function show_family_code(code)
	if type(code) == "string" then
		return "<code>" .. code .. "</code>"
	else
		return require("Module:debug").highlight_dump(code)
	end
end

local function check_languages()
	local check_language_data_keys = check_data_keys{
		1, 2, 3, 4, -- canonical name, wikidata item, family, scripts
		"display_text", "entry_name", "sort_key",
		"otherNames", "aliases", "varieties",
		"type", "scripts", "ancestors",
		"wikimedia_codes", "wikipedia_article", "standardChars",
		"translit", "override_translit", "link_tr",
		"dotted_dotless_i"
	}
	
	local function check_language(modname, code, data, exdata)
		local canonical_name, lang_type = data[1], data.type
		
		check_language_data_keys(modname, code, data)
		
		if all_codes[code] then
			discrepancy(modname, "Code <code>%s</code> is not unique; it is also defined in [[Module:%s]].", code, all_codes[code])
		else
			if not m_language_codes[code] then
				discrepancy("languages/code to canonical name", "The code <code>%s</code> (%s) is missing.", code, canonical_name)
			end
			all_codes[code] = modname
		end
		
		if not canonical_name then
			discrepancy(modname, "Code <code>%s</code> has no canonical name specified.", code)
		elseif language_names[canonical_name] then
			discrepancy(modname,
				"%s (<code>%s</code>) has a canonical name that is not unique; it is also used by the code <code>%s</code>.",
				link(canonical_name), code, language_names[canonical_name])
		else
			if not m_language_canonical_names[canonical_name] then
				discrepancy("languages/canonical names", "The canonical name %s (<code>%s</code>) is missing.", canonical_name, code)
			end
			language_names[canonical_name] = code
		end
		
		check_wikidata_item(modname, code, data, 2)

		if exdata then
			check_other_names_aliases_varieties(modname, code, canonical_name, exdata)
		end
		
		if lang_type and not (lang_type == "regular" or lang_type == "reconstructed" or lang_type == "appendix-constructed") then
			discrepancy(modname, "%s (<code>%s</code>) is of an invalid type <code>%s</code>.", link(canonical_name), code, data.type)
		end
		
		if data.scripts and data[4] then
			discrepancy(modname, "%s (<code>%s</code>) has both <code>4</code> and <code>scripts</code>.", link(canonical_name), code)
		end
		
		local sc = data.scripts or data[4]
		if sc then
			if type(sc) == "table" then
				check_array(modname, code, data, data.scripts and "scripts" or 4)
				if not sc[1] then
					discrepancy(modname, "%s (<code>%s</code>) has no scripts listed.", link(canonical_name), code)
				else
					for _, sccode in ipairs(sc) do
						local cur_sc = m_script_data[sccode]
						if not cur_sc then
							discrepancy(modname,
								"%s (<code>%s</code>) lists an invalid script code <code>%s</code>.",
								link(canonical_name), code, sccode)
						-- elseif not cur_sc.characters then
						-- 	discrepancy(modname,
						-- 		"%s (<code>%s</code>) lists a script without characters <code>%s</code> (%s).",
						-- 		link(canonical_name), code, sccode, cur_sc.canonicalName)
						end
			
						nonempty_scripts[sccode] = true
					end
				end
			elseif type(sc) == "string" then
				if not m_script_data[sc] then
					discrepancy(modname,
						"%s (<code>%s</code>) has an invalid script code %s.",
						link(canonical_name), code, show_family_code(family))
				end
			else
				discrepancy(modname,
					"The %s field for %s (<code>%s</code>) must be a table or string.",
					data.scripts and "scripts" or 4, link(canonical_name), code)
			end
		end
		
		if data.ancestors then
			check_ancestors(modname, code, data, data.ancestors, false)
		end
		
		if data[3] then
			local family = data[3]
			if not m_family_data[family] then
				discrepancy(modname,
					"%s (<code>%s</code>) has an invalid family code %s.",
					link(canonical_name), code, show_family_code(family))
			end
			
			nonempty_families[family] = true
		end
		
		if data.sort_key then
			check_entry_name_or_sortkey(modname, code, data, "sort_key")
		end
		
		if data.entry_name then
			check_entry_name_or_sortkey(modname, code, data, "entry_name")
		end

		if data.display then
			check_entry_name_or_sortkey(modname, code, data, "display")
		end

		if data.standardChars then
			validate_pattern(data.standardChars, modname, code, data, true)
		end
		
		check_true_or_nil(modname, code, data, "override_translit")
		check_true_or_nil(modname, code, data, "link_tr")
		
		if data.override_translit and not data.translit then
			discrepancy(modname,
				"%s (<code>%s</code>) has <code>override_translit</code> set, but no transliteration module",
				link(canonical_name), code)
		end
	end
	
	-- Check two-letter codes
	local modname = "languages/data2"
	local data2 = require("Module:" .. modname)
	local extradata2 = require("Module:" .. modname:gsub("data", "extradata"))
	
	for code, data in pairs(data2) do
		if not code:find("^[a-z][a-z]$") then
			discrepancy(modname, '%s (<code>%s</code>) does not have a two-letter code.', link(data[1] or data.canonicalName), code)
		end
		
		check_language(modname, code, data, extradata2[code])
	end
	
	-- Check three-letter codes
	for i = string.byte('a'), string.byte('z') do
		local letter = string.char(i)
		local modname = "languages/data3/" .. letter
		local data3 = require("Module:" .. modname)
		local extradata3 = require("Module:" .. modname:gsub("data", "extradata"))
		local code_pattern = "^" .. letter .. "[a-z][a-z]$"
		
		for code, data in pairs(data3) do
			if not code:find(code_pattern) then
				discrepancy(modname,
					'%s (<code>%s</code>) does not have a three-letter code starting with "<code>%s</code>".',
					link(data[1] or data.canonicalName), code, letter)
			end
			
			check_language(modname, code, data, extradata3[code])
		end
	end
	
	-- Check exceptional codes
	modname = "languages/datax"
	local datax = require("Module:" .. modname)
	local extradatax = require("Module:" .. modname:gsub("data", "extradata"))
	
	for code, data in pairs(datax) do
		if code:find("^[a-z][a-z][a-z]?$") then
			discrepancy(modname, '%s (<code>%s</code>) has a two- or three-letter code.', link(data[1] or data.canonicalName), code)
		end
		
		check_language(modname, code, data, extradatax[code])
	end
	
	-- These checks must be done while all_codes only contains language codes:
	-- that is, after language data modules have been processed, but before
	-- etymology languages, families, and scripts have.
	local function check_code_and_name(modname, code, canonical_name)
		if not all_codes[code] then
			if not language_names[canonical_name] then
				discrepancy(modname,
					"The code <code>%s</code> and the canonical name %s should be removed; they are not found in a submodule of [[Module:languages]].",
					code, canonical_name)
			else
				discrepancy(modname,
					"<code>%s</code>, the code for the canonical name %s, is wrong; it should be <code>%s</code>.",
					code, canonical_name, language_names[canonical_name])
			end
		elseif not language_names[canonical_name] then
			local data_table = require("Module:" .. all_codes[code])[code]
			discrepancy(modname,
				"%s, the canonical name for the code <code>%s</code>, is wrong; it should be %s.",
				canonical_name, code, data_table[1] or data_table.canonicalName)
		end
	end
	
	for code, canonical_name in pairs(m_language_codes) do
		check_code_and_name("languages/code to canonical name", code, canonical_name)
	end
	
	for canonical_name, code in pairs(m_language_canonical_names) do
		check_code_and_name("languages/canonical names", code, canonical_name)
	end	
	
	-- Check [[Template:langname-lite]]
	local frame = mw.getCurrentFrame()
	local content = mw.title.new("Template:langname-lite"):getContent()
	content = content:gsub("%<%!%-%-.-%-%-%>", "") -- remove comments
	local match = mw.ustring.gmatch(content, "\n\t*|#*([^\n]+)=([^\n]*)")
	while true do
		local code, name, check_name = match()
		if not code then return "OK" end
		if code:len() > 1 and code ~= "default" then
			codes = mw.text.split(code, "|")
			for _, code in pairs(codes) do
				local lang = require("Module:languages").getByCode(code, true, true, true)
				if name:match("etymcode") then
					parent_lang = require("Module:languages").getNonEtymological(lang)
					local parent_name = frame:preprocess(name)
					local parent_real_name = parent_lang:getCanonicalName()
					if parent_name ~= parent_real_name then
						discrepancy("Template:langname-lite", "Code: <code>" .. code .. "</code>. Saw name: " .. parent_name .. ". Expected name: " .. parent_real_name .. ".")
					end
					name = frame:preprocess(name:gsub("{{{allow etym|}}}", "1"))
				elseif name:match("familycode") then
					name = name:match("familycode|(.-)|")
				else
					name = name
				end
				local real_name = lang:getCanonicalName()
				if name ~= real_name then
					discrepancy("Template:langname-lite", "Code: <code>" .. code .. "</code>. Saw name: " .. name .. ". Expected name: " .. real_name .. ".")
				end
			end
		end
	end
end

local function check_etym_languages()
	local modname = "etymology languages/data"
	
	local check_etymology_language_data_keys = check_data_keys{
		"canonicalName", "otherNames", "aliases", "varieties", "parent",
		"wikipedia_article", "wikidata_item", "ancestors", "ancestral_to_parent"
	}
	
	local function link(name)
		if not name then
			return "???"
		elseif name:find("[Ll]anguage$") then
			return name
		else
			return name .. " language"
		end
	end
	
	for code, data in pairs(m_etym_language_data) do
		local canonical_name, parent, ancestors =
			data.canonicalName, data.parent, data.ancestors
		check_etymology_language_data_keys(modname, code, data)
		
		if all_codes[code] then
			discrepancy(modname, "Code <code>%s</code> is not unique; it is also defined in [[Module:%s]].", code, all_codes[code])
		else
			all_codes[code] = modname
		end
		
		if not canonical_name then
			discrepancy(modname, "Code <code>%s</code> has no canonical name specified.", code)
		elseif language_names[canonical_name] then
			--[=[
			discrepancy(modname,
				"%s (<code>%s</code>) has a canonical name that is not unique; it is also used by the code <code>%s</code>.",
				link(data.names[1]), code, language_names[data.names[1]])
			--]=]
		else
			language_names[canonical_name] = code
		end
		
		check_other_names_aliases_varieties(modname, code, canonical_name, data)
		
		if parent then
			if type(parent) ~= "string" then
				discrepancy(modname,
					"Etymology-only %s (<code>%s</code>) has a parent language or family code that is %s rather than a string.",
					link(canonical_name), code, parent == nil and "nil" or "a " .. type(parent))
			elseif not (m_language_data[parent] or m_family_data[parent] or m_etym_language_data[parent]) then
				discrepancy(modname,
					"Etymology-only %s (<code>%s</code>) has invalid parent language or family code <code>%s</code>.",
					link(canonical_name), code, parent)
			end
			
			nonempty_families[parent] = true
		else
			discrepancy(modname,
				"Etymology-only %s (<code>%s</code>) has no parent language or family code.",
				link(canonical_name), code)
		end
		
		if ancestors then
			check_ancestors(modname, code, data, ancestors, true)
		end
		
		check_wikidata_item(modname, code, data, "wikidata_item")
	end

	local checked = {}
	for code, data in pairs(m_etym_language_data) do
		local stack = {}

		while data do
			if checked[data] then
				break	
			end
			if stack[data] then
				discrepancy(modname, "%s (<code>%s</code>) has a cyclic parental relationship to %s (<code>%s</code>)",
					link(data[1] or data.canonicalName), code,
					link(m_etym_language_data[data.parent].canonicalName), data.parent
				)
				break
			end
			stack[data] = true
			code, data = data.parent, data.parent and m_etym_language_data[data.parent]
		end
		
		for data in pairs(stack) do
			checked[data] = true	
		end
	end
end

local function check_families()
	local modname = "families/data"
	
	local check_family_data_keys = check_data_keys{
		"canonicalName", "otherNames", "aliases", "varieties", "family",
		"protoLanguage", "wikidata_item"
	}

	local function link(name)
		if not name then
			return "???"
		elseif name:find("[Ll]anguages$") then
			return "[[:Category:" .. name .. "|" .. name .. " family]]"
		else
			return "[[:Category:" .. name .. " languages|" .. name .. " family]]"
		end
	end
	
	for code, data in pairs(m_family_data) do
		check_family_data_keys(modname, code, data)
		
		local canonical_name, family = data.canonicalName, data.family
		
		if all_codes[code] then
			discrepancy(modname, "Code <code>%s</code> is not unique; it is also defined in [[Module:%s]].", code, all_codes[code])
		else
			all_codes[code] = modname
		end
		
		if not canonical_name then
			discrepancy(modname, "<code>%s</code> has no canonical name specified.", code)
		elseif family_names[canonical_name] then
			discrepancy(modname,
				"%s (<code>%s</code>) has a canonical name that is not unique; it is also used by the code <code>%s</code>.",
				link(canonical_name), code, family_names[canonical_name])
		else
			family_names[canonical_name] = code
		end
		
		check_other_names_aliases_varieties(modname, code, canonical_name, data)
		
		if family then
			if family == code and code ~= "qfa-not" then
				discrepancy(modname,
					"%s (<code>%s</code>) has itself as its family.",
					link(canonical_name), code)
			elseif not m_family_data[family] then
				discrepancy(modname,
					"%s (<code>%s</code>) has an invalid parent family code %s.",
					link(canonical_name), code, show_family_code(family))
			end
			
			nonempty_families[family] = true
		end
		
		check_wikidata_item(modname, code, data, "wikidata_item")
	end
	
	for code, data in pairs(m_family_data) do
		if not (nonempty_families[code] or allowed_empty_families[code]) then
			discrepancy(modname, "%s (<code>%s</code>) has no child families or languages.", link(data.canonicalName), code)
		end
	end

	local checked = { ['qfa-not'] = true }
	for code, data in pairs(m_family_data) do
		local stack = {}

		while data do
			if checked[code] then
				break	
			end
			if stack[code] then
				discrepancy(modname, "%s (<code>%s</code>) has a cyclic parental relationship to %s (<code>%s</code>)",
					link(data[1] or data.canonicalName), code,
					link(m_family_data[data[3]].canonicalName), data[3]
				)
				break
			end
			stack[code] = true
			code, data = data.family, m_family_data[data[3]]
		end
		
		for code in pairs(stack) do
			checked[code] = true	
		end
	end
end

local function check_scripts()
	local modname = "scripts/data"
	
	local check_script_data_keys = check_data_keys({
		"canonicalName", "otherNames", "aliases", "varieties", "parent",
		"systems", "wikipedia_article", "characters", "capitalized", "translit", "direction",
		"character_category", "normalizationFixes"
	}, true)
	
	local m_script_codes = require('Module:scripts/code to canonical name')
	local m_script_canonical_names = require('Module:scripts/by name')
	
	for code, data in pairs(m_script_data) do
		local canonical_name = data.canonicalName
		if not m_script_codes[code] and #code == 4 then
			discrepancy('scripts/code to canonical name', '<code>%s</code> (%s) is missing', code, canonical_name)
		end
		
		check_script_data_keys(modname, code, data)
		
		if not canonical_name then
			discrepancy(modname, "Code <code>%s</code> has no canonical name specified.", code)
		elseif script_names[canonical_name] then
			--[=[
			discrepancy(modname,
				"%s (<code>%s</code>) has a canonical name that is not unique; it is also used by the code <code>%s</code>.",
				link_script(data.names[1]), code, script_names[data.names[1]])
			--]=]
		else
			if not m_script_canonical_names[canonical_name] and #code == 4 then
				discrepancy('scripts/by name', '%s (<code>%s</code>) is missing', canonical_name, code)
			end
			script_names[canonical_name] = code
		end
		
		check_other_names_aliases_varieties(modname, code, canonical_name, data)
		
		if not nonempty_scripts[code] then
			discrepancy(modname,
				"%s (<code>%s</code>) is not used by any language%s.",
				link_script(canonical_name), code, data.characters and ""
					or " and has no characters listed for auto-detection")
		--[[
		elseif not data.characters then
			discrepancy(modname, "%s (<code>%s</code>) has no characters listed for auto-detection.", link_script(canonical_name), code)
		--]]
		end

		if data.characters then
			validate_pattern(data.characters, modname, code, data, false)
		end
	end
end

local function check_zh_trad_simp()
	local m_ts = mw.loadData("Module:zh/data/ts")
	local m_st = mw.loadData("Module:zh/data/st")
	
	local function find_stable_loop(chars, simp, trad, t)
		local function check(char, char_str, other, other_str, char_table, t)
			if not char then error(table.concat(chars)) end
			table.insert(chars, '{{ruby|[' .. char .. '](' .. char_str .. ')}}')
			if not other then
				table.insert(chars, 'not found')
				return chars, 1
			elseif char_table[other] ~= char then
				return find_stable_loop(chars, other, char_table[other], not t)
			else
				table.insert(chars, '{{ruby|[' .. other .. '](' .. other_str .. ')}} (')
				table.insert(chars, '{{ruby|[' .. char_table[other] .. '](' .. char_str .. ')}} etc.)')
				return chars, 2
			end
		end
		
		if t then
			chars, issue = check(trad, "Trad.", simp, "Simp.", m_st.st, t)
		else
			chars, issue = check(simp, "Simp.", trad, "Trad.", m_ts.ts, t)
		end
		
		return chars, issue
	end
	
	local frame = mw.getCurrentFrame()
	for simp, trad in pairs(m_st.st) do
		if m_ts.ts[trad] ~= simp then
			local chars, issue = {}
			table.insert(chars, '{{ruby|[' .. simp .. '](Simp.)}}')
			chars, issue = find_stable_loop(chars, m_ts.ts[trad], trad, true)
			if issue == 1 then
				local mod
				if chars[#chars-1]:match("Trad.") then mod = "ts" else mod = "st" end
				discrepancy('zh/data/' .. mod, 'missing character: ' .. frame:preprocess('{{lang|zh|sc=Hani|' .. table.concat(chars, '}} → {{lang|zh|sc=Hani|') .. '}}'))
			elseif issue == 2 then
				discrepancy('zh/data/st', 'possible mismatched character: ' .. frame:preprocess('{{lang|zh|sc=Hani|' .. table.concat(chars, '}} → {{lang|zh|sc=Hani|') .. '}}'))
			end
		end
	end
	for trad, simp in pairs(m_ts.ts) do
		if m_st.st[simp] ~= trad then
			local chars, issue = {}
			table.insert(chars, '{{ruby|[' .. trad .. '](Trad.)}}')
			chars, issue = find_stable_loop(chars, simp, m_st.st[simp], false)
			if issue == 1 then
				local mod
				if chars[#chars-1]:match("Simp.") then mod = "st" else mod = "ts" end
				discrepancy('zh/data/' .. mod, 'missing character: ' .. frame:preprocess('{{lang|zh|sc=Hani|' .. table.concat(chars, '}} → {{lang|zh|sc=Hani|') .. '}}'))
			elseif issue == 2 then
				discrepancy('zh/data/ts', 'possible mismatched character: ' .. frame:preprocess('{{lang|zh|sc=Hani|' .. table.concat(chars, '}} → {{lang|zh|sc=Hani|') .. '}}'))
			end
		end
	end
end

-- Warning: cannot be called twice in the same module invocation because
-- some module-global variables are not reset between calls.
function export.do_checks()
	messages = setmetatable({}, {
		__index = function (self, k)
			local val = Array()
			self[k] = val
			return val
		end
	})
	
	check_languages()
	check_etym_languages()

	-- families and scripts must be checked AFTER languages; languages checks fill out
	-- the nonempty_families and nonempty_scripts tables, used for testing if a family/script
	-- is ever used in the data
	check_families()
	check_scripts()
	check_zh_trad_simp()
	
	setmetatable(messages, nil)
	
	local function find_code(message)
		return string.match(message, "<code>([^<]+)</code>")
	end
	
	find_code = require("Module:fun").memoize(find_code)
	
	local function comp(message1, message2)
		local code1, code2 = find_code(message1), find_code(message2)
		if code1 and code2 then
			return code1 < code2
		else
			return message1 < message2
		end
	end
	
	for modname, msglist in pairs(messages) do
		msglist:sort(comp)
	end
	
	local ret = messages
	messages = nil
	return ret
end

function export.format_message(modname, msglist)
	local header; if modname:match("^Module:") or modname:match("^Template:") then
		header = "===[[" .. modname .. "]]==="
	else
		header = "===[[Module:" .. modname .. "]]==="
	end
	return header
		.. msglist
			:map(
				function(msg)
					return "\n* " .. msg
				end)
			:concat()
end

function export.check_modules(...)
	local ret = Array()
	local messages = export.do_checks()
	for _, module in ipairs {...} do
		local msglist = messages[module]
		if msglist then
			ret:insert(export.format_message(module, msglist))
		end
	end
	return ret:concat("\n")
end

function export.check_modules_t(frame)
	local args = m_table.shallowcopy(frame.args)
	return export.check_modules(unpack(args))
end

function export.perform(frame)
	local messages = export.do_checks()
	
	-- Format the messages
	local ret = Array()
	for modname, msglist in m_table.sortedPairs(messages) do
		ret:insert(export.format_message(modname, msglist))
	end
	
	-- Are there any messages?
	if i == 1 then
		return '<b class="success">Glory to Arstotzka.</b>'
	else
		ret:insert(1, '<b class="warning">Discrepancies detected:</b>')
		
		return ret:concat('\n')
	end
end

return export