API References

Yunir.alignMethod
align(src::Array{String}, tgt::Array{String}; 
	costmodel::CostModel=CostModel(match=0, mismatch=1, insertion=1, deletion=0),
	store_results::Bool=true
)

ALign tgt array of texts to src array of texts using a particular costmodel from BioAlignments.jl. store_results if results of alignment are stored or returned, otherwise, only the scores are returned.

source
Yunir.alignMethod
align(src::String, tgt::String; costmodel::BioAlignments.CostModel=BioAlignments.CostModel(match=0, mismatch=1, insertion=1, deletion=1))

Align tgt string to src string using a particular costmodel from BioAlignments.jl.

source
Yunir.arabicMethod
arabic(s::String[, encoder::AbstractEncoder])

Encode the String object into Arabic characters. Custom encoder generated from @transliterator can be provided, but default is Buckwalter.

Examples

julia> bw_basmala = "bisomi {ll~ahi {lr~aHoma`ni {lr~aHiymi"
julia> arabic(bw_basmala)
"بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ"
source
Yunir.cleanFunction
clean(s::String, replace_non_ar::String, target_regex::Regex)

Clean the input text by replacing all non-Arabic texts with a string input.

source
Yunir.dediacMethod
dediac(s::String)

Dediacritize the input String object.

Examples

julia> bw_basmala = "bisomi {ll~ahi {lr~aHoma`ni {lr~aHiymi"
julia> dediac(bw_basmala)
"bsm {llh {lrHmn {lrHym"
julia> dediac(arabic(bw_basmala))
"بسم ٱلله ٱلرحمن ٱلرحيم"
source
Yunir.encodeMethod
encode(s::String)

Transliterate the input String object using Buckwalter.

Examples

julia> ar_basmala = "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ"
julia> encode(ar_basmala)
"bisomi {ll~ahi {lr~aHoma`ni {lr~aHiymi"
source
Yunir.encodeMethod
encode(s::Union{Char,String}, encoder::AbstractEncoder)

Transliterate the input String object using a custom encoder. Custom encoder is generated using the @transliterator.

source
Yunir.isfeatMethod
isfeat(x::Orthography, y::Type{<:AbstractConsonant})

checks if x is a y feature.

julia> ar_basmala = "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ";
julia> arb_token = tokenize(ar_basmala)
4-element Vector{String}:
 "بِسْمِ"
 "ٱللَّهِ"
 "ٱلرَّحْمَٰنِ"
 "ٱلرَّحِيمِ"
julia> arb_parsed2 = parse.(Orthography, arb_token)
4-element Vector{Orthography}:
 Orthography(Type[Ba, Kasra, Seen, Sukun, Meem, Kasra])
 Orthography(Type[AlifHamzatWasl, Lam, Lam, Shadda, Fatha, Ha, Kasra])
 Orthography(Type[AlifHamzatWasl, Lam, Ra, Shadda, Fatha, HHa, Sukun, Meem, Fatha, AlifKhanjareeya, Noon, Kasra])
 Orthography(Type[AlifHamzatWasl, Lam, Ra, Shadda, Fatha, HHa, Kasra, Ya, Meem, Kasra])
julia> isfeat(arb_parsed2[1], AbstractLunar)
6-element BitVector:
 1
 0
 0
 0
 1
 0
source
Yunir.normalizeFunction
normalize(s::String)

Normalize a Arabic or Buckwalter String characters.

Examples

julia> normalize("بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ")
"بِسْمِ اللَّهِ الرَّحْمَانِ الرَّحِيمِ"
source
Yunir.normalizeMethod
normalize(s::String, char::Symbol)

Normalize a specific Arabic or Buckwalter String character (chars).

Examples

julia> ar_basmala = "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ"
julia> normalize(ar_basmala, :alif_khanjareeya) === "بِسْمِ ٱللَّهِ ٱلرَّحْمَانِ ٱلرَّحِيمِ"
source
Yunir.normalizeMethod
normalize(s::String, chars::Array{Symbol,1})

Normalize a specific Arabic or Buckwalter String character/s (chars).

Examples

julia> ar_basmala = "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ"
julia> normalize(ar_basmala, [:alif_khanjareeya, :hamzat_wasl]) === "بِسْمِ اللَّهِ الرَّحْمَانِ الرَّحِيمِ"
source
Yunir.tokenizeFunction
tokenize(s::String)

tokenizes the string input s by space, and also tokenizes the punctuations.

julia> ar_basmala = "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ"
julia> tokenize(ar_basmala)
4-element Vector{String}:
 "بِسْمِ"
 "ٱللَّهِ"
 "ٱلرَّحْمَٰنِ"
 "ٱلرَّحِيمِ"
source
Yunir.@transliteratorMacro
@transliterator(dict, name)

Create a custom transliterator using an input dict (Dict object) with its corresponding name as String object. This will automatically update the transliterator inside all functions like arabic, verses, and encode.

Examples

julia> my_encoder = Dict(
    Symbol(Char(0x0621)) => Symbol('('),
    Symbol(Char(0x0622)) => Symbol('''),
    Symbol(Char(0x0623)) => Symbol('&'),
    Symbol(Char(0x0624)) => Symbol('>'),
    Symbol(Char(0x0625)) => Symbol('}'),
    Symbol(Char(0x0626)) => Symbol('<'),
    Symbol(Char(0x0627)) => Symbol('b'),
    Symbol(Char(0x0628)) => Symbol('A'),
    Symbol(Char(0x0629)) => Symbol('t'),
    Symbol(Char(0x062A)) => Symbol('p'),
    Symbol(Char(0x062B)) => Symbol('j'),
    Symbol(Char(0x062C)) => Symbol('v'),
    Symbol(Char(0x062D)) => Symbol('x'),
    Symbol(Char(0x062E)) => Symbol('H'),
    Symbol(Char(0x062F)) => Symbol('*'),
    Symbol(Char(0x0630)) => Symbol('d'),
    Symbol(Char(0x0631)) => Symbol('z'),
    Symbol(Char(0x0632)) => Symbol('r'),
    Symbol(Char(0x0633)) => Symbol('$'),
    Symbol(Char(0x0634)) => Symbol('s'),
    Symbol(Char(0x0635)) => Symbol('D'),
    Symbol(Char(0x0636)) => Symbol('S'),
    Symbol(Char(0x0637)) => Symbol('Z'),
    Symbol(Char(0x0638)) => Symbol('T'),
    Symbol(Char(0x0639)) => Symbol('g'),
    Symbol(Char(0x063A)) => Symbol('E'),
    Symbol(Char(0x0640)) => Symbol('f'),
    Symbol(Char(0x0641)) => Symbol('_'),
    Symbol(Char(0x0642)) => Symbol('k'),
    Symbol(Char(0x0643)) => Symbol('q'),
    Symbol(Char(0x0644)) => Symbol('m'),
    Symbol(Char(0x0645)) => Symbol('l'),
    Symbol(Char(0x0646)) => Symbol('h'),
    Symbol(Char(0x0647)) => Symbol('n'),
    Symbol(Char(0x0648)) => Symbol('Y'),
    Symbol(Char(0x0649)) => Symbol('w'),
    Symbol(Char(0x064A)) => Symbol('F'),
    Symbol(Char(0x064B)) => Symbol('y'),
    Symbol(Char(0x064C)) => Symbol('K'),
    Symbol(Char(0x064D)) => Symbol('N'),
    Symbol(Char(0x064E)) => Symbol('u'),
    Symbol(Char(0x064F)) => Symbol('a'),
    Symbol(Char(0x0650)) => Symbol('~'),
    Symbol(Char(0x0651)) => Symbol('i'),
    Symbol(Char(0x0652)) => Symbol('^'),
    Symbol(Char(0x0653)) => Symbol('o'),
    Symbol(Char(0x0654)) => Symbol('`'),
    Symbol(Char(0x0670)) => Symbol('#'),
    Symbol(Char(0x0671)) => Symbol(':'),
    Symbol(Char(0x06DC)) => Symbol('{'),
    Symbol(Char(0x06DF)) => Symbol('"'),
    Symbol(Char(0x06E0)) => Symbol('@'),
    Symbol(Char(0x06E2)) => Symbol(';'),
    Symbol(Char(0x06E3)) => Symbol('['),
    Symbol(Char(0x06E5)) => Symbol('.'),
    Symbol(Char(0x06E6)) => Symbol(','),
    Symbol(Char(0x06E8)) => Symbol('-'),
    Symbol(Char(0x06EA)) => Symbol('!'),
    Symbol(Char(0x06EB)) => Symbol('%'),
    Symbol(Char(0x06EC)) => Symbol('+'),
    Symbol(Char(0x06ED)) => Symbol(']')
);
julia> @transliterator my_encoder "MyEncoder"
julia> encode(ar_basmala)
"A~$^l~ :mmiun~ :mziux^lu#h~ :mziux~Fl~"
source
Yunir.@transliteratorMacro
@transliterator(symbl)

Fallback to the default Buckwalter transliterator.

julia> @transliterator :default
julia> ar_basmala = "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ"
julia> encode(ar_basmala)
"bisomi {ll~ahi {lr~aHoma`ni {lr~aHiymi"
source