Module:scripts
Appearance
Documentation for this module may be created at Module:scripts/doc
local export = {}
local Script = {}
function Script:getCode()
return self._code
end
function Script:getCanonicalName()
return self._rawData.canonicalName
end
function Script:getDisplayForm()
return self:getCategoryName("nocap")
end
function Script:getOtherNames(onlyOtherNames)
return require("Module:language-like").getOtherNames(self, onlyOtherNames)
end
function Script:getAliases()
return self._rawData.aliases or {}
end
function Script:getVarieties(flatten)
return require("Module:language-like").getVarieties(self, flatten)
end
function Script:getParent()
return self._rawData.parent
end
function Script:getSystems()
if not self._systemObjects then
local m_systems = require("Module:writing systems")
self._systemObjects = {}
for _, sys in ipairs(self._rawData.systems or {}) do
table.insert(self._systemObjects, m_systems.getByCode(sys))
end
end
return self._systemObjects
end
--function Script:getAllNames()
-- return self._rawData.names
--end
function Script:getType()
return "script"
end
function Script:getCategoryName(nocap)
local name = self._rawData.canonicalName
-- If the name already has "code" or "semaphore" in it, don't add it.
-- No names contain "script".
if not name:find("[Cc]ode$") and not name:find("[Ss]emaphore$") then
name = name .. " script"
end
if not nocap then
name = mw.getContentLanguage():ucfirst(name)
end
return name
end
function Script:makeCategoryLink()
return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]"
end
function Script:getWikipediaArticle()
return self._rawData.wikipedia_article or self:getCategoryName()
end
function Script:getCharacters()
if self._rawData.characters then
return self._rawData.characters
else
return nil
end
end
function Script:countCharacters(text)
if not self._rawData.characters then
return 0
-- Due to the number of Chinese characters, a different determination method is used when differentiating between traditional ("Hant") and simplified ("Hans") Chinese.
elseif self._code == "Hant" then
local u = mw.ustring.char
local ts_data = mw.loadData("Module:zh/data/ts")
for codepoint in mw.ustring.gcodepoint(text) do
if ts_data.ts[u(codepoint)] then return 1 end
end
return 0
elseif self._code == "Hans" then
local u = mw.ustring.char
local st_data = mw.loadData("Module:zh/data/st")
for codepoint in mw.ustring.gcodepoint(text) do
if st_data.st[u(codepoint)] then return 1 end
end
return 0
else
local _, num = mw.ustring.gsub(text, "[" .. self._rawData.characters .. "]", "")
return num
end
end
function Script:hasCapitalization()
return not not self._rawData.capitalized
end
function Script:isTransliterated()
if type(self._rawData.translit) == "boolean" then
return self._rawData.translit
else
return true
end
end
function Script:getDirection()
return self._rawData.direction
end
function Script:getRawData()
return self._rawData
end
-- Return true if the script requires fixes to Unciode normalization under certain circumstances.
function Script:hasNormalizationFixes()
return not not self._rawData.normalizationFixes
end
-- Corrects discouraged sequences of Unicode characters to the encouraged equivalents.
function Script:fixDiscouragedSequences(text)
if self:hasNormalizationFixes() and self._rawData.normalizationFixes.from then
local gsub
for i, from in ipairs(self._rawData.normalizationFixes.from) do
-- Most of the time, it's faster to check whether mw.ustring.gsub is actually necessary before using it, as it's a lot slower.
if from:match("[%%.[%]*+%-?]") then gsub = mw.ustring.gsub else gsub = string.gsub end
text = gsub(text, from, self._rawData.normalizationFixes.to[i] or "")
end
end
return text
end
-- Implements a modified form of Unicode normalization, which fixes issues caused by MediaWiki, and for instances where there are identified deficiencies in the default Unicode combining classes.
local function fixNormalization(text, self)
-- Convert special pagename characters generated by {{PAGENAME}} etc. back to their normal forms.
if text:match("&#") then
text = text:gsub(""", "\""):gsub("&", "&"):gsub("'", "'")
end
-- Fixes to Unicode combining class.
if self:hasNormalizationFixes() and self._rawData.normalizationFixes.combiningClasses then
local combiningClassFixes = self._rawData.normalizationFixes.combiningClasses
local charsToFix = table.concat(require("Module:table").keysToList(combiningClassFixes))
if mw.ustring.match(text, "[" .. charsToFix .. "]") then
local gsub, codepoint, u = mw.ustring.gsub, mw.ustring.codepoint, mw.ustring.char
-- Obtain the list of default combining classes.
local combiningClasses = mw.loadData("Module:scripts/data/combiningClasses")
-- For each character that needs fixing, find all characters with combining classes equal to or lower than its default class, but greater than its new class (i.e. intermediary characters).
for charToFix, newCombiningClass in pairs(combiningClassFixes) do
local intermediaryChars = {}
for character, combiningClass in pairs(combiningClasses) do
if newCombiningClass < combiningClass and combiningClass <= combiningClasses[codepoint(charToFix)] then
table.insert(intermediaryChars, u(character))
end
end
-- Swap the character with any intermediary characters that are immediately before it.
text = gsub(text, "([" .. table.concat(intermediaryChars) .. "]+)(" .. charToFix .. ")", "%2%1")
end
end
end
return text
end
function Script:toFixedNFC(text)
return fixNormalization(mw.ustring.toNFC(text), self)
end
function Script:toFixedNFD(text)
return fixNormalization(mw.ustring.toNFD(text), self)
end
function Script:toFixedNFKC(text)
return fixNormalization(mw.ustring.toNFKC(text), self)
end
function Script:toFixedNFKD(text)
return fixNormalization(mw.ustring.toNFKD(text), self)
end
function Script:toJSON()
local ret = {
canonicalName = self:getCanonicalName(),
categoryName = self:getCategoryName("nocap"),
code = self._code,
otherNames = self:getOtherNames(true),
aliases = self:getAliases(),
varieties = self:getVarieties(),
type = self:getType(),
direction = self:getDirection(),
characters = self:getCharacters(),
parent = self:getParent(),
systems = self._rawData.systems or {},
wikipediaArticle = self._rawData.wikipedia_article,
}
return require("Module:JSON").toJSON(ret)
end
Script.__index = Script
function export.makeObject(code, data)
return data and setmetatable({ _rawData = data, _code = code, _type = "script object" }, Script) or nil
end
function export.getByCode(code, paramForError, disallowNil)
if code == nil and not disallowNil then
return nil
end
if code == "IPAchar" then
require("Module:debug/track")("IPAchar")
end
local retval = export.makeObject(code, mw.loadData("Module:scripts/data")[code])
if not retval and paramForError then
require("Module:languages/error")(code, paramForError, "script code", nil, "not real lang")
end
return retval
end
function export.getByCanonicalName(name)
local code = mw.loadData("Module:scripts/by name")[name]
if not code then
return nil
end
return export.makeObject(code, mw.loadData("Module:scripts/data")[code])
end
--[=[
Takes a codepoint or a character and finds the script code (if any) that is
appropriate for it based on the codepoint, using the data module
[[Module:scripts/recognition data]]. The data module was generated from the
patterns in [[Module:scripts/data]] using [[Module:User:Erutuon/script recognition]].
Converts the character to a codepoint. Returns a script code if the codepoint
is in the list of individual characters, or if it is in one of the defined
ranges in the 4096-character block that it belongs to, else returns "None".
]=]
function export.charToScript(char)
return require("Module:scripts/charToScript").charToScript(char)
end
function export.findBestScriptWithoutLang(text)
return require("Module:scripts/charToScript").findBestScriptWithoutLang(text)
end
return export