Morphological Analysis

Among the features of CAMeL Tools, is the availability of a token analyzer. In the following example, we will analyze the first word of the basmalah from avrs1 above. Load the data as follows:

julia> using QuranTree

julia> crps, tnzl = load(QuranData());

julia> crpsdata = table(crps);

julia> tnzldata = table(tnzl);

julia> avrs1 = verses(tnzldata[1][1])[1]
"بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ"

julia> dediac(avrs1)
"بسم ٱلله ٱلرحمٰن ٱلرحيم"

To analyze the Morphological feature of the basmalah, run the following:

julia> using Pkg

julia> Pkg.add("PyCall")
  Resolving package versions...
No Changes to `~/work/QuranTree.jl/QuranTree.jl/docs/Project.toml`
No Changes to `~/work/QuranTree.jl/QuranTree.jl/docs/Manifest.toml`

julia> using PyCall

julia> using JuliaDB

julia> using PrettyTables

julia> @ptconf vcrop_mode=:middle tf=tf_compact

julia> @pyimport camel_tools.morphology.database as camel_database

julia> @pyimport camel_tools.morphology.analyzer as camel_analyzer

julia> db = camel_database.MorphologyDB.builtin_db()
PyObject <camel_tools.morphology.database.MorphologyDB object at 0x7fc56037b310>

julia> analyzer = camel_analyzer.Analyzer(db)
PyObject <camel_tools.morphology.analyzer.Analyzer object at 0x7fc573cb57c0>

julia> analyses = analyzer.analyze(split(avrs1)[1])
11-element Array{Dict{Any,Any},1}:
 Dict("form_num" => "s","root" => "ب.س.م","prc1" => "0","pos_lex_logprob" => -99.0,"vox" => "na","diac" => "بَسْم","cas" => "u","bw" => "بَسْم/NOUN","ud" => "NOUN","rat" => "i"…)
 Dict("form_num" => "s","root" => "ب.س.م","prc1" => "0","pos_lex_logprob" => -99.0,"vox" => "na","diac" => "بَسْمِ","cas" => "g","bw" => "بَسْم/NOUN+ِ/CASE_DEF_GEN","ud" => "NOUN","rat" => "i"…)
 Dict("form_num" => "s","root" => "ب.س.م","prc1" => "0","pos_lex_logprob" => -99.0,"vox" => "na","diac" => "بَسْمَ","cas" => "a","bw" => "بَسْم/NOUN+َ/CASE_DEF_ACC","ud" => "NOUN","rat" => "i"…)
 Dict("form_num" => "s","root" => "ب.س.م","prc1" => "0","pos_lex_logprob" => -99.0,"vox" => "na","diac" => "بَسْمُ","cas" => "n","bw" => "بَسْم/NOUN+ُ/CASE_DEF_NOM","ud" => "NOUN","rat" => "i"…)
 Dict("form_num" => "s","root" => "ب.س.م","prc1" => "0","pos_lex_logprob" => -99.0,"vox" => "na","diac" => "بَسْمٌ","cas" => "n","bw" => "بَسْم/NOUN+ٌ/CASE_INDEF_NOM","ud" => "NOUN","rat" => "i"…)
 Dict("form_num" => "s","root" => "ب.س.م","prc1" => "0","pos_lex_logprob" => -99.0,"vox" => "na","diac" => "بَسْمٍ","cas" => "g","bw" => "بَسْم/NOUN+ٍ/CASE_INDEF_GEN","ud" => "NOUN","rat" => "i"…)
 Dict("form_num" => "-","root" => "بسم","prc1" => "bi_prep","pos_lex_logprob" => -99.0,"vox" => "na","diac" => "بِسْمِ","cas" => "u","bw" => "بِ/PREP+سْمِ/NOUN","num" => "-","rat" => "y"…)
 Dict("form_num" => "s","root" => "ب.س.م","prc1" => "0","pos_lex_logprob" => -99.0,"vox" => "a","diac" => "بَسَمَ","cas" => "na","bw" => "بَسَم/PV+َ/PVSUFF_SUBJ:3MS","ud" => "VERB","rat" => "n"…)
 Dict("form_num" => "s","root" => "س.م.م","prc1" => "bi_prep","pos_lex_logprob" => -5.002611,"vox" => "na","diac" => "بِسَمّ","cas" => "u","bw" => "بِ/PREP+سَمّ/NOUN","ud" => "ADP+NOUN","rat" => "i"…)
 Dict("form_num" => "s","root" => "س.م.م","prc1" => "bi_prep","pos_lex_logprob" => -5.002611,"vox" => "na","diac" => "بِسَمِّ","cas" => "g","bw" => "بِ/PREP+سَمّ/NOUN+ِ/CASE_DEF_GEN","ud" => "ADP+NOUN","rat" => "i"…)
 Dict("form_num" => "s","root" => "س.م.م","prc1" => "bi_prep","pos_lex_logprob" => -5.002611,"vox" => "na","diac" => "بِسَمٍّ","cas" => "g","bw" => "بِ/PREP+سَمّ/NOUN+ٍ/CASE_INDEF_GEN","ud" => "ADP+NOUN","rat" => "i"…)

julia> tbl = table([(;Dict(Symbol.(keys(d)) .=> collect(values(d)))...) for d in analyses])
Table with 11 rows, 42 columns:
Columns:
#   colname          type
────────────────────────────
1   vox              String
2   lex_logprob      Float64
3   caphi            String
4   lex              String
5   d2seg            String
6   prc1             String
7   prc3             String
8   atbtok           String
9   stemcat          String
10  enc0             String
11  pos_lex_logprob  Float64
12  pattern          String
13  d3seg            String
14  mod              String
15  pos_logprob      Float64
16  prc2             String
17  catib6           String
18  bwtok            String
19  d2tok            String
20  rat              String
21  d1tok            String
22  stemgloss        String
23  d1seg            String
24  stem             String
25  pos              String
26  diac             String
27  bw               String
28  gen              String
29  ud               String
30  form_num         String
31  stt              String
32  asp              String
33  gloss            String
34  d3tok            String
35  num              String
36  root             String
37  cas              String
38  prc0             String
39  atbseg           String
40  source           String
41  form_gen         String
42  per              String

julia> @pt tbl
 -------- ------------- ----------------- --------- -------- --------- ---------
     vox   lex_logprob             caphi       lex    d2seg      prc1     prc3 ⋯
  String       Float64            String    String   String    String   String ⋯
 -------- ------------- ----------------- --------- -------- --------- ---------
      na         -99.0           b_a_s_m     بَسْم_1      بَسْم         0        0 ⋯
      na         -99.0         b_a_s_m_i     بَسْم_1      بَسْمِ         0        0 ⋯
      na         -99.0         b_a_s_m_a     بَسْم_1      بَسْمَ         0        0 ⋯
      na         -99.0         b_a_s_m_u     بَسْم_1      بَسْمُ         0        0 ⋯
      na         -99.0       b_a_s_m_u_n     بَسْم_1      بَسْمٌ         0        0 ⋯
      na         -99.0       b_a_s_m_i_n     بَسْم_1      بَسْمٍ         0        0 ⋯
      na         -99.0         b_i_s_m_i     بِسْمِ_1      بِسْمِ   bi_prep        0 ⋯
       a         -99.0       b_a_s_a_m_a   بَسَم-i_1      بَسَمَ         0        0 ⋯
      na      -5.00261       b_i_s_a_m_m      سَمّ_1    بِ+_سَمّ   bi_prep        0 ⋯
      na      -5.00261     b_i_s_a_m_m_i      سَمّ_1    بِ+_سَمِّ   bi_prep        0 ⋯
      na      -5.00261   b_i_s_a_m_m_i_n      سَمّ_1    بِ+_سَمٍّ   bi_prep        0 ⋯
 -------- ------------- ----------------- --------- -------- --------- ---------
                                                              35 columns omitted
Note

You need to install JuliaDB.jl and PrettyTables.jl to successfully run the code.

using Pkg
Pkg.add("JuliaDB")
Pkg.add("PrettyTables")

The following is the table of the above output properly formatted in HTML.

Pkg.add("DataFrames")
Pkg.add("IterableTables")
Pkg.add("Latexify")
using DataFrames: DataFrame
using IterableTables
using Latexify

mdtable(DataFrame(tbl), latex=false)
voxlex_logprobcaphilexd2segprc1prc3atbtokstemcatenc0poslexlogprobpatternd3segmodpos_logprobprc2catib6bwtokd2tokratd1tokstemglossd1segstemposdiacbwgenudform_numsttaspglossd3toknumrootcasprc0atbsegsourceform_genper
na-99.0bas_mبَسْم_1بَسْم00بَسْمN0-99.01َ2ْ3بَسْمna-0.43442330NOMبَسْمبَسْمiبَسْمsmilingبَسْمبَسْمnounبَسْمبَسْم/NOUNmNOUNsinasmilingبَسْمsب.س.مu0بَسْمlexmna
na-99.0basmiبَسْم_1بَسْمِ00بَسْمِN0-99.01َ2ْ3ِبَسْمِna-0.43442330NOMبَسْم_+ِبَسْمِiبَسْمِsmilingبَسْمِبَسْمnounبَسْمِبَسْم/NOUN+ِ/CASEDEFGENmNOUNscnasmiling+[def.gen.]بَسْمِsب.س.مg0بَسْمِlexmna
na-99.0basmaبَسْم_1بَسْمَ00بَسْمَN0-99.01َ2ْ3َبَسْمَna-0.43442330NOMبَسْم_+َبَسْمَiبَسْمَsmilingبَسْمَبَسْمnounبَسْمَبَسْم/NOUN+َ/CASEDEFACCmNOUNscnasmiling+[def.acc.]بَسْمَsب.س.مa0بَسْمَlexmna
na-99.0basmuبَسْم_1بَسْمُ00بَسْمُN0-99.01َ2ْ3ُبَسْمُna-0.43442330NOMبَسْم_+ُبَسْمُiبَسْمُsmilingبَسْمُبَسْمnounبَسْمُبَسْم/NOUN+ُ/CASEDEFNOMmNOUNscnasmiling+[def.nom.]بَسْمُsب.س.مn0بَسْمُlexmna
na-99.0basmu_nبَسْم_1بَسْمٌ00بَسْمٌN0-99.01َ2ْ3ٌبَسْمٌna-0.43442330NOMبَسْم_+ٌبَسْمٌiبَسْمٌsmilingبَسْمٌبَسْمnounبَسْمٌبَسْم/NOUN+ٌ/CASEINDEFNOMmNOUNsinasmiling+[indef.nom.]بَسْمٌsب.س.مn0بَسْمٌlexmna
na-99.0basmi_nبَسْم_1بَسْمٍ00بَسْمٍN0-99.01َ2ْ3ٍبَسْمٍna-0.43442330NOMبَسْم_+ٍبَسْمٍiبَسْمٍsmilingبَسْمٍبَسْمnounبَسْمٍبَسْم/NOUN+ٍ/CASEINDEFGENmNOUNsinasmiling+[indef.gen.]بَسْمٍsب.س.مg0بَسْمٍlexmna
na-99.0bismiبِسْمِ_1بِسْمِbi_prep0بِسْمِFW-Wa0-99.01ِ2ْ3ِبِسْمِna-99.00بِسْمِبِسْمِyبِسْمِin;by+(the)Nameofبِسْمِبِسْمِnounبِسْمِبِ/PREP+سْمِ/NOUN--inain;by+(the)Nameofبِسْمِ-بسمu0بِسْمِlex-na
a-99.0basam_aبَسَم-i_1بَسَمَ00بَسَمَPV0-99.01َ2َ3َبَسَمَi-1.0232080VRBبَسَم_+َبَسَمَnبَسَمَsmileبَسَمَبَسَمverbبَسَمَبَسَم/PV+َ/PVSUFF_SUBJ:3MSmVERBsnapsmile+he;it_<verb>بَسَمَsب.س.مna0بَسَمَlexm3
na-5.002611bisam_mسَمّ_1بِ+_سَمّbi_prep0بِ+_سَمّN0-5.002611بِ1َ2ّبِ+_سَمّna-0.43442330PRT+NOMبِ+_سَمّبِ+_سَمّiبِسَمّpoisonبِسَمّسَمّnounبِسَمّبِ/PREP+سَمّ/NOUNmADP+NOUNsinaby;with+poisonبِ+_سَمّsس.م.مu0بِ+_سَمّlexmna
na-5.002611bisammiسَمّ_1بِ+_سَمِّbi_prep0بِ+_سَمِّN0-5.002611بِ1َ2ِّبِ+_سَمِّna-0.43442330PRT+NOMبِ+سَمّبِ+_سَمِّiبِسَمِّpoisonبِسَمِّسَمّnounبِسَمِّبِ/PREP+سَمّ/NOUN+ِ/CASEDEFGENmADP+NOUNscnaby;with+poison+[def.gen.]بِ+_سَمِّsس.م.مg0بِ+_سَمِّlexmna
na-5.002611bisammi_nسَمّ_1بِ+_سَمٍّbi_prep0بِ+_سَمٍّN0-5.002611بِ1َ2ٍّبِ+_سَمٍّna-0.43442330PRT+NOMبِ+سَمّبِ+_سَمٍّiبِسَمٍّpoisonبِسَمٍّسَمّnounبِسَمٍّبِ/PREP+سَمّ/NOUN+ٍ/CASEINDEFGENmADP+NOUNsinaby;with+poison+[indef.gen.]بِ+_سَمٍّsس.م.مg0بِ+_سَمٍّlexmna