Working with QuranTree.jl

Yunir.jl can seemlessly work with QuranTree.jl.

julia> using Yunir
julia> using QuranTree
julia> data = QuranData();
julia> crps, tnzl = load(data);
julia> crpsdata = table(crps)Quranic Arabic Corpus (morphology) (C) 2011 Kais Dukes 128219×7 DataFrame Row │ chapter verse word part form tag features ⋯ │ Int64 Int64 Int64 Int64 String String String ⋯ ────────┼─────────────────────────────────────────────────────────────────────── 1 │ 1 1 1 1 bi P PREFIX|bi+ ⋯ 2 │ 1 1 1 2 somi N STEM|POS:N|LEM:{so 3 │ 1 1 2 1 {ll~ahi PN STEM|POS:PN|LEM:{l 4 │ 1 1 3 1 {l DET PREFIX|Al+ 5 │ 1 1 3 2 r~aHoma`ni ADJ STEM|POS:ADJ|LEM:r ⋯ 6 │ 1 1 4 1 {l DET PREFIX|Al+ 7 │ 1 1 4 2 r~aHiymi ADJ STEM|POS:ADJ|LEM:r 8 │ 1 2 1 1 {lo DET PREFIX|Al+ ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱ 128213 │ 114 5 5 2 n~aAsi N STEM|POS:N|LEM:n~a ⋯ 128214 │ 114 6 1 1 mina P STEM|POS:P|LEM:min 128215 │ 114 6 2 1 {lo DET PREFIX|Al+ 128216 │ 114 6 2 2 jin~api N STEM|POS:N|LEM:jin 128217 │ 114 6 3 1 wa CONJ PREFIX|w:CONJ+ ⋯ 128218 │ 114 6 3 2 {l DET PREFIX|Al+ 128219 │ 114 6 3 3 n~aAsi N STEM|POS:N|LEM:n~a 1 column and 128204 rows omitted
julia> tnzldata = table(tnzl)Tanzil Quran Text (Uthmani) (C) 2008-2010 Tanzil.net 6236×3 DataFrame Row │ chapter verse form │ Int64 Int64 String ──────┼─────────────────────────────────────────────────── 1 │ 1 1 بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ 2 │ 1 2 ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ 3 │ 1 3 ٱلرَّحْمَٰنِ ٱلرَّحِيمِ 4 │ 1 4 مَٰلِكِ يَوْمِ ٱلدِّينِ 5 │ 1 5 إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ 6 │ 1 6 ٱهْدِنَا ٱلصِّرَٰطَ ٱلْمُسْتَقِيمَ 7 │ 1 7 صِرَٰطَ ٱلَّذِينَ أَنْعَمْتَ عَلَيْهِمْ غَيْرِ ٱلْمَغْضُو… 8 │ 2 1 بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ الٓمٓ ⋮ │ ⋮ ⋮ ⋮ 6230 │ 113 5 وَمِن شَرِّ حَاسِدٍ إِذَا حَسَدَ 6231 │ 114 1 بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ قُلْ أَعُوذُ بِ… 6232 │ 114 2 مَلِكِ ٱلنَّاسِ 6233 │ 114 3 إِلَٰهِ ٱلنَّاسِ 6234 │ 114 4 مِن شَرِّ ٱلْوَسْوَاسِ ٱلْخَنَّاسِ 6235 │ 114 5 ٱلَّذِى يُوَسْوِسُ فِى صُدُورِ ٱلنَّاسِ 6236 │ 114 6 مِنَ ٱلْجِنَّةِ وَٱلنَّاسِ 6221 rows omitted
julia> arabic(verses(crpsdata[114])[1])"قُلْ أَعُوذُ بِرَبِّ ٱلنَّاسِ"
Note

You have to install QuranTree.jl to run the above example. To install, run

using Pkg
Pkg.add("QuranTree")

Normalization

julia> ikhlas = crpsdata[114]Chapter 114: ٱلنَّاس (People)

30×6 DataFrame
 Row │ verse  word   part   form        tag     features                       ⋯
     │ Int64  Int64  Int64  String      String  String                         ⋯
─────┼──────────────────────────────────────────────────────────────────────────
   1 │     1      1      1  qulo        V       STEM|POS:V|IMPV|LEM:qaAla|ROOT ⋯
   2 │     1      2      1  >aEuw*u     V       STEM|POS:V|IMPF|LEM:Eu*o|ROOT:
   3 │     1      3      1  bi          P       PREFIX|bi+
   4 │     1      3      2  rab~i       N       STEM|POS:N|LEM:rab~|ROOT:rbb|M
   5 │     1      4      1  {l          DET     PREFIX|Al+                     ⋯
   6 │     1      4      2  n~aAsi      N       STEM|POS:N|LEM:n~aAs|ROOT:nws|
   7 │     2      1      1  maliki      N       STEM|POS:N|LEM:malik|ROOT:mlk|
   8 │     2      2      1  {l          DET     PREFIX|Al+
  ⋮  │   ⋮      ⋮      ⋮        ⋮         ⋮                     ⋮              ⋱
  24 │     5      5      2  n~aAsi      N       STEM|POS:N|LEM:n~aAs|ROOT:nws| ⋯
  25 │     6      1      1  mina        P       STEM|POS:P|LEM:min
  26 │     6      2      1  {lo         DET     PREFIX|Al+
  27 │     6      2      2  jin~api     N       STEM|POS:N|LEM:jin~ap|ROOT:jnn
  28 │     6      3      1  wa          CONJ    PREFIX|w:CONJ+                 ⋯
  29 │     6      3      2  {l          DET     PREFIX|Al+
  30 │     6      3      3  n~aAsi      N       STEM|POS:N|LEM:n~aAs|ROOT:nws|
                                                    1 column and 15 rows omitted
julia> ikhlas_vrs = verses(ikhlas)6-element Vector{String}: "qulo >aEuw*u birab~i {ln~aAsi" "maliki {ln~aAsi" "<ila`hi {ln~aAsi" "min \$ar~i {lowasowaAsi {loxan~aAsi" "{l~a*iY yuwasowisu fiY Suduwri {ln~aAsi" "mina {lojin~api wa{ln~aAsi"
julia> ikhlas_nrm = normalize.(ikhlas_vrs; isarabic=false)6-element Vector{String}: "qulo AaEuw*u birab~i Aln~aAsi" "maliki Aln~aAsi" "AilaAhi Aln~aAsi" "min \$ar~i AlowasowaAsi Aloxan~aAsi" "Al~a*iy yuwasowisu fiy Suduwri Aln~aAsi" "mina Alojin~ahi waAln~aAsi"
julia> arabic.(ikhlas_nrm)6-element Vector{String}: "قُلْ اَعُوذُ بِرَبِّ النَّاسِ" "مَلِكِ النَّاسِ" "اِلَاهِ النَّاسِ" "مِن شَرِّ الْوَسْوَاسِ الْخَنَّاسِ" "الَّذِي يُوَسْوِسُ فِي صُدُورِ النَّاسِ" "مِنَ الْجِنَّهِ وَالنَّاسِ"

Dediacritization

julia> ikhlas_ddc = dediac.(ikhlas_vrs; isarabic=false)6-element Vector{String}:
 "ql >Ew* brb {lnAs"
 "mlk {lnAs"
 "<lh {lnAs"
 "mn \$r {lwswAs {lxnAs"
 "{l*Y ywsws fY Sdwr {lnAs"
 "mn {ljnp w{lnAs"
julia> arabic.(ikhlas_ddc)6-element Vector{String}: "قل أعوذ برب ٱلناس" "ملك ٱلناس" "إله ٱلناس" "من شر ٱلوسواس ٱلخناس" "ٱلذى يوسوس فى صدور ٱلناس" "من ٱلجنة وٱلناس"

Transliteration

julia> ar_ikhlas = verses(tnzldata[114])6-element Vector{String}:
 "بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ قُلْ أَعُوذُ بِرَبِّ ٱلنَّاسِ"
 "مَلِكِ ٱلنَّاسِ"
 "إِلَٰهِ ٱلنَّاسِ"
 "مِن شَرِّ ٱلْوَسْوَاسِ ٱلْخَنَّاسِ"
 "ٱلَّذِى يُوَسْوِسُ فِى صُدُورِ ٱلنَّاسِ"
 "مِنَ ٱلْجِنَّةِ وَٱلنَّاسِ"
julia> encode.(ar_ikhlas)6-element Vector{String}: "bisomi {ll~ahi {lr~aHoma`ni {lr~aHiymi qulo >aEuw*u birab~i {ln~aAsi" "maliki {ln~aAsi" "<ila`hi {ln~aAsi" "min \$ar~i {lowasowaAsi {loxan~aAsi" "{l~a*iY yuwasowisu fiY Suduwri {ln~aAsi" "mina {lojin~api wa{ln~aAsi"