From 767c7f65f36a8307f1c4bd001adac39011291229 Mon Sep 17 00:00:00 2001 From: Avik Sengupta Date: Sun, 8 Nov 2020 20:52:55 +0000 Subject: [PATCH 01/23] move ci to github actions --- .github/workflows/ci.yml | 61 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b46410b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,61 @@ +name: CI +on: + push: + branches: + - master + tags: '*' + pull_request: +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.version == 'nightly' }} + strategy: + matrix: + version: + - '1.3' + - '1' + - 'nightly' + os: + - ubuntu-latest + - macOS-latest + - windows-latest + arch: + - x86 + - x64 + exclude: + # Remove some configurations from the build matrix to reduce CI time. + # See https://github.com/marketplace/actions/setup-julia-environment + # MacOS not available on x86 + - {os: 'macOS-latest', arch: 'x86'} + # Don't test on all versions + - {os: 'macOS-latest', version: '1.3'} + - {os: 'macOS-latest', version: 'nightly'} + - {os: 'windows-latest', version: '1.3'} + - {os: 'windows-latest', version: 'nightly'} + - {os: 'windows-latest', arch: 'x86'} + steps: + - uses: actions/checkout@v1 + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/julia-buildpkg@latest + - uses: julia-actions/julia-runtest@latest + with: + coverage: false + docs: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: julia-actions/setup-julia@latest + with: + version: '1.5' + - run: julia --project=docs -e ' + using Pkg; + Pkg.develop(PackageSpec(; path=pwd())); + Pkg.instantiate();' + - run: julia --project=docs docs/make.jl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From ec410a3748a671d4a401857753d4c4350c5aa252 Mon Sep 17 00:00:00 2001 From: Avik Sengupta Date: Sun, 8 Nov 2020 20:59:27 +0000 Subject: [PATCH 02/23] set DATADEPS env --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b46410b..70ce92e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,6 +42,8 @@ jobs: arch: ${{ matrix.arch }} - uses: julia-actions/julia-buildpkg@latest - uses: julia-actions/julia-runtest@latest + env: + DATADEPS_ALWAYS_ACCEPT: true with: coverage: false docs: From 1c325730d66e4f95cb1abce52b9d6d0dc837134b Mon Sep 17 00:00:00 2001 From: Avik Sengupta Date: Sun, 8 Nov 2020 21:00:25 +0000 Subject: [PATCH 03/23] Goodbye Travis. Thanks for all the fish. --- .travis.yml | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index bf028c9..0000000 --- a/.travis.yml +++ /dev/null @@ -1,30 +0,0 @@ -language: julia -os: - - linux - - osx - - windows -env: - - DATADEPS_ALWAYS_ACCEPT=true -julia: - - 1.3 - - 1 - - nightly -matrix: - allow_failures: - - julia: nightly - exclude: - - os: osx - julia: 1.3 - - os: windows - julia: 1.3 - - os: osx - julia: nightly - - os: windows - julia: nightly - fast_finish: true -branches: - only: - - master - - /release-.*/ -notifications: - email: false From e6a7833c8c9643e214fc688142d55e5518f04af1 Mon Sep 17 00:00:00 2001 From: Avik Sengupta Date: Sun, 8 Nov 2020 21:00:49 +0000 Subject: [PATCH 04/23] docs typos --- docs/make.jl | 2 +- docs/src/index.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index 5876f79..06b376f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -2,7 +2,7 @@ using Documenter, TextModels makedocs( modules = [TextModels], - sitename = "TextAnalysis", + sitename = "TextModels", format = Documenter.HTML( ), pages = [ diff --git a/docs/src/index.md b/docs/src/index.md index 2168e45..8c36217 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -2,6 +2,8 @@ The TextModels package enhances the TextAnalysis package with end-user focussed, practical natural language models, typically based on neural networks (in this case, [Flux](https://fluxml.ai/)) +This package depends on the [TextAnalysis](https://github.com/JuliaText/TextAnalysis.jl) package, which contains basic algorithms to deal with textual documetns. + ## Installation The TextModels package can be installed using Julia's package manager: From f2da616e0d049cfc4d51ea61c3fb9a1b91136175 Mon Sep 17 00:00:00 2001 From: Avik Sengupta Date: Tue, 17 Nov 2020 20:49:28 +0000 Subject: [PATCH 05/23] reduce testing on x86 --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 70ce92e..67b437d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,6 +34,8 @@ jobs: - {os: 'windows-latest', version: '1.3'} - {os: 'windows-latest', version: 'nightly'} - {os: 'windows-latest', arch: 'x86'} + - {arch: 'x86', version: '1.3'} + - {arch: 'x86', version: 'nightly'} steps: - uses: actions/checkout@v1 - uses: julia-actions/setup-julia@latest From 42a0e06a8440c9c0f32e8ed11ed9709bf7692ddd Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Wed, 28 Apr 2021 22:18:55 -0700 Subject: [PATCH 06/23] Update crf, ner, pos --- Project.toml | 35 ++++++++++++--------------- src/CRF/crf.jl | 4 ++-- src/CRF/loss.jl | 4 ++-- src/CRF/predict.jl | 6 ++--- src/TextModels.jl | 42 ++++++++++++++++----------------- src/sequence/pos.jl | 2 +- src/sequence/sequence_models.jl | 32 ++++++++++++------------- test/crf.jl | 17 ++++++------- test/runtests.jl | 2 +- 9 files changed, 70 insertions(+), 74 deletions(-) diff --git a/Project.toml b/Project.toml index 4687488..7d6b695 100644 --- a/Project.toml +++ b/Project.toml @@ -2,40 +2,35 @@ name = "TextModels" uuid = "77b9cbda-2a23-51df-82a3-24144d1cd378" license = "MIT" desc = "Practical Neural Network based models for Natural Language Processing" -version = "0.1.0" +version = "0.1.1" [deps] BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Languages = "8ef0a80b-9436-5d2c-a485-80b904378c43" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TextAnalysis = "a2db99b7-8b79-58f8-94bf-bbc811eef33d" -Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" WordTokenizers = "796a5d58-b03d-544a-977e-18100b691f6e" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] -BSON = "0.2.5" -DataDeps = "0.7" -DataStructures = "0.17, 0.18" -Flux = "0.9" -JSON = "0.21" -Languages = "0.4" -NNlib = "0.6, 0.7" -StatsBase = "0.33" -TextAnalysis = "0.7" -Tracker = "0.2" -WordTokenizers = "0.5" -julia = "1.3" +BSON = "0.3.3" +DataDeps = "0.7.7" +DataStructures = "0.18.9" +Flux = "0.12.2" +JSON = "0.21.1" +Languages = "0.4.3" +NNlib = "0.7" +StatsBase = "0.33.6" +TextAnalysis = "0.7.3" +WordTokenizers = "0.5.6" +Zygote = "0.6.10" +julia = "1.6" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/CRF/crf.jl b/src/CRF/crf.jl index 98ffc23..3145d89 100644 --- a/src/CRF/crf.jl +++ b/src/CRF/crf.jl @@ -22,10 +22,10 @@ function CRF(n::Integer) W[:, n + 1] .= -10000 W[n + 2, :] .= -10000 - return CRF(param(W), n) + return CRF(W, n) end -@treelike CRF +@functor CRF function Base.show(io::IO, c::CRF) print(io, "CRF with ", c.n + 2, " distinct tags (including START and STOP tags).") diff --git a/src/CRF/loss.jl b/src/CRF/loss.jl index 495816d..c1405fd 100644 --- a/src/CRF/loss.jl +++ b/src/CRF/loss.jl @@ -5,13 +5,13 @@ Compute the Normalization / partition function or the Forward Algorithm score - `Z` """ function forward_score(c::CRF, x, init_α) - forward_var = log_sum_exp((c.W .+ x[1]') .+ init_α) + forward_var = log_sum_exp((c.W .+ (x[1]') .+ init_α)) for i in 2:length(x) forward_var = log_sum_exp((c.W .+ x[i]') .+ forward_var') end - return log_sum_exp(c.W[:, c.n + 2] + forward_var')[1] + return log_sum_exp(c.W[:, c.n + 2] .+ forward_var')[1] end """ diff --git a/src/CRF/predict.jl b/src/CRF/predict.jl index 29e2c34..3225b70 100644 --- a/src/CRF/predict.jl +++ b/src/CRF/predict.jl @@ -35,14 +35,14 @@ Computes the forward pass for viterbi algorithm. function _decode(c::CRF, x, init_vit_vars) α_idx = zeros(Int, c.n + 2, length(x)) - forward_var, α_idx[:, 1] = forward_pass_unit(Tracker.data((c.W .+ x[1]') .+ init_vit_vars)) + forward_var, α_idx[:, 1] = forward_pass_unit((c.W .+ x[1]') .+ init_vit_vars) for i in 2:length(x) - forward_var, α_idx[:, i] = forward_pass_unit(Tracker.data((c.W .+ x[i]') .+ forward_var')) + forward_var, α_idx[:, i] = forward_pass_unit((c.W .+ x[i]') .+ forward_var') end labels = zeros(Int, length(x)) - labels[end] = argmax(forward_var + Tracker.data(c.W[:, c.n + 2])')[2] + labels[end] = argmax(forward_var + (c.W[:, c.n + 2])')[2] for i in reverse(2:length(x)) labels[i - 1] = α_idx[labels[i], i] diff --git a/src/TextModels.jl b/src/TextModels.jl index a82ec68..79d6223 100644 --- a/src/TextModels.jl +++ b/src/TextModels.jl @@ -7,8 +7,8 @@ module TextModels using Pkg.Artifacts - using Flux, Tracker - using Flux: identity, onehot, onecold, @treelike, onehotbatch + using Flux, Zygote + using Flux: identity, onehot, onecold, @functor, onehotbatch using TextAnalysis @@ -36,31 +36,31 @@ module TextModels include("sequence/pos_datadeps.jl") include("sequence/pos.jl") include("sequence/sequence_models.jl") - - + + # ULMFiT - module ULMFiT - using ..TextAnalysis - using DataDeps - using Flux - using Tracker - using BSON - include("ULMFiT/utils.jl") - include("ULMFiT/datadeps.jl") - include("ULMFiT/data_loaders.jl") - include("ULMFiT/custom_layers.jl") - include("ULMFiT/pretrain_lm.jl") - include("ULMFiT/fine_tune_lm.jl") - include("ULMFiT/train_text_classifier.jl") - end - export ULMFiT + #module ULMFiT + # using ..TextAnalysis + # using DataDeps + # using Flux + # using Tracker + # using BSON + # include("ULMFiT/utils.jl") + # include("ULMFiT/datadeps.jl") + # include("ULMFiT/data_loaders.jl") + # include("ULMFiT/custom_layers.jl") + # include("ULMFiT/pretrain_lm.jl") + # include("ULMFiT/fine_tune_lm.jl") + # include("ULMFiT/train_text_classifier.jl") + #end + #export ULMFiT function __init__() pos_tagger_datadep_register() ner_datadep_register() pos_datadep_register() - ULMFiT.ulmfit_datadep_register() - + #ULMFiT.ulmfit_datadep_register() + global sentiment_model = artifact"sentiment_model" end end diff --git a/src/sequence/pos.jl b/src/sequence/pos.jl index 9346a3a..b23c210 100644 --- a/src/sequence/pos.jl +++ b/src/sequence/pos.jl @@ -1,4 +1,4 @@ -using BSON, Tracker +using BSON const PoSCharUNK = '¿' const PoSWordUNK = "" diff --git a/src/sequence/sequence_models.jl b/src/sequence/sequence_models.jl index b19e6a0..8c8a6df 100644 --- a/src/sequence/sequence_models.jl +++ b/src/sequence/sequence_models.jl @@ -1,4 +1,4 @@ -using BSON, Tracker +using BSON mutable struct BiLSTM_CNN_CRF_Model{C, W, L, D, O, A} labels::Array{String, 1} # List of Labels chars_idx#::Dict{Char, Integer} # Dict that maps chars to indices in W_Char_Embed @@ -33,32 +33,32 @@ function BiLSTM_CNN_CRF_Model(labels, chars_idx, words_idx, UNK_char_idx,UNK_Wor init_α[n + 1] = 0 # Word and Character Embeddings. - W_word_Embed = BSON.load(joinpath(weights_path, "W_word_cpu.bson"))[:W_word_cpu] - W_Char_Embed = BSON.load(joinpath(weights_path, "W_char_cpu.bson"))[:W_char_cpu] + W_word_Embed = BSON.load(joinpath(weights_path, "W_word_cpu.bson"))[:W_word_cpu][:, 1:end-1] # no padding char token here + W_Char_Embed = BSON.load(joinpath(weights_path, "W_char_cpu.bson"))[:W_char_cpu][:, 1:end-1] # no padding word token here # Forward_LSTM forward_wts = BSON.load(joinpath(weights_path, "forward_lstm.bson")) forward_lstm = Flux.Recur(Flux.LSTMCell(forward_wts[:lstm_2], # Wi forward_wts[:lstm_1], # Wh forward_wts[:lstm_3], # b - forward_wts[:lstm_4], # h - forward_wts[:lstm_5] # c + (reshape(forward_wts[:lstm_4], length(forward_wts[:lstm_4]), 1), # h + reshape(forward_wts[:lstm_5], length(forward_wts[:lstm_5]), 1)) # c ), - forward_wts[:lstm_init], - forward_wts[:lstm_state] - ) + (reshape(forward_wts[:lstm_state][1], length(forward_wts[:lstm_state][1]), 1), # h + reshape(forward_wts[:lstm_state][2], length(forward_wts[:lstm_state][2]), 1)) + ) # Backward_LSTM backward_wts = BSON.load(joinpath(weights_path, "backward_lstm.bson")) backward = Flux.Recur(Flux.LSTMCell(backward_wts[:lstm_2], # Wi backward_wts[:lstm_1], # Wh backward_wts[:lstm_3], # b - backward_wts[:lstm_4], # h - backward_wts[:lstm_5] # c - ), - backward_wts[:lstm_init], - backward_wts[:lstm_state] - ) + (reshape(backward_wts[:lstm_4], length(backward_wts[:lstm_4]), 1), # h + reshape(backward_wts[:lstm_5], length(backward_wts[:lstm_5]), 1)) # c + ), + (reshape(backward_wts[:lstm_state][1], length(backward_wts[:lstm_state][1]), 1), # h + reshape(backward_wts[:lstm_state][2], length(backward_wts[:lstm_state][2]), 1)) + ) # Dense d_weights_bias = BSON.load(joinpath(weights_path, "d_cpu.bson")) @@ -69,7 +69,7 @@ function BiLSTM_CNN_CRF_Model(labels, chars_idx, words_idx, UNK_char_idx,UNK_Wor # Load CRF. crf_wt = BSON.load(joinpath(weights_path, "crf_cpu.bson"))[:crf_Weights] - c = TextModels.CRF(crf_wt, size(crf_wt)[1] - 2) + c = CRF(crf_wt, size(crf_wt)[1] - 2) # Load Conv conv_wt_bias = BSON.load(joinpath(weights_path, "conv_cpu.bson")) @@ -100,7 +100,7 @@ function (a::BiLSTM_CNN_CRF_Model)(x) oh_outs = viterbi_decode(a.c, m(x), a.init_α) Flux.reset!(a.backward) Flux.reset!(a.forward_lstm) - [a.labels[oh.ix] for oh in oh_outs] + [a.labels[oh.indices] for oh in oh_outs] end onehotinput(m::BiLSTM_CNN_CRF_Model, word) = (onehot(get(m.words_idx, lowercase(word), m.UNK_Word_idx), 1:length(m.words_idx)), diff --git a/test/crf.jl b/test/crf.jl index 34237d2..d88e32e 100644 --- a/test/crf.jl +++ b/test/crf.jl @@ -1,5 +1,6 @@ using Flux -using Flux: gradient, LSTM, Dense, reset!, onehot, RNN +using Flux: LSTM, Dense, reset!, onehot, RNN +using Zygote: gradient using TextModels: score_sequence, forward_score @testset "crf" begin @@ -118,7 +119,7 @@ using TextModels: score_sequence, forward_score function train() for d in data reset!(lstm) - grads = Tracker.gradient(() -> loss(d[1], d[2]), ps) + grads = gradient(() -> loss(d[1], d[2]), ps) Flux.Optimise.update!(opt, ps, grads) end end @@ -129,17 +130,17 @@ using TextModels: score_sequence, forward_score end to_sum = [find_loss(d) for d in data] l1 = sum(to_sum) - dense_param_1 = deepcopy(Tracker.data(d_out.W)) - lstm_param_1 = deepcopy(Tracker.data(lstm.cell.Wh)) - crf_param_1 = deepcopy(Tracker.data(c.W)) + dense_param_1 = deepcopy(d_out.W) + lstm_param_1 = deepcopy(lstm.cell.Wh) + crf_param_1 = deepcopy(c.W) for i in 1:10 train() end - dense_param_2 = deepcopy(Tracker.data(d_out.W)) - lstm_param_2 = deepcopy(Tracker.data(lstm.cell.Wh)) - crf_param_2 = deepcopy(Tracker.data(c.W)) + dense_param_2 = deepcopy(d_out.W)) + lstm_param_2 = deepcopy(lstm.cell.Wh) + crf_param_2 = deepcopy(c.W) l2 = sum([find_loss(d) for d in data]) @test l1 > l2 diff --git a/test/runtests.jl b/test/runtests.jl index 1bcac94..1221b31 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,5 +8,5 @@ include("crf.jl") include("ner.jl") include("pos.jl") include("averagePerceptronTagger.jl") -include("ulmfit.jl") +#include("ulmfit.jl") include("sentiment.jl") From 3b614e07682d8c9ec1b6916f15a3238e09a98cdc Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Fri, 30 Apr 2021 09:52:24 -0700 Subject: [PATCH 07/23] Update ULMFiT model fix errors in training Correction in code for Text Classifier Remove gpu erro --- Project.toml | 4 +++ src/TextModels.jl | 33 ++++++++--------- src/ULMFiT/custom_layers.jl | 55 +++++++++++++++++------------ src/ULMFiT/data_loaders.jl | 32 ++++++++--------- src/ULMFiT/fine_tune_lm.jl | 26 ++++++-------- src/ULMFiT/pretrain_lm.jl | 20 +++++------ src/ULMFiT/sentiment.jl | 4 +-- src/ULMFiT/train_text_classifier.jl | 48 +++++++++++++++++-------- src/ULMFiT/utils.jl | 4 +-- test/ulmfit.jl | 6 ++-- 10 files changed, 129 insertions(+), 103 deletions(-) diff --git a/Project.toml b/Project.toml index 7d6b695..df1e03f 100644 --- a/Project.toml +++ b/Project.toml @@ -6,12 +6,16 @@ version = "0.1.1" [deps] BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CorpusLoaders = "214a0ac2-f95b-54f7-a80b-442ed9c2c9e8" DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Languages = "8ef0a80b-9436-5d2c-a485-80b904378c43" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TextAnalysis = "a2db99b7-8b79-58f8-94bf-bbc811eef33d" diff --git a/src/TextModels.jl b/src/TextModels.jl index 79d6223..f437cb1 100644 --- a/src/TextModels.jl +++ b/src/TextModels.jl @@ -39,27 +39,28 @@ module TextModels # ULMFiT - #module ULMFiT - # using ..TextAnalysis - # using DataDeps - # using Flux - # using Tracker - # using BSON - # include("ULMFiT/utils.jl") - # include("ULMFiT/datadeps.jl") - # include("ULMFiT/data_loaders.jl") - # include("ULMFiT/custom_layers.jl") - # include("ULMFiT/pretrain_lm.jl") - # include("ULMFiT/fine_tune_lm.jl") - # include("ULMFiT/train_text_classifier.jl") - #end - #export ULMFiT + module ULMFiT + using TextAnalysis + using DataDeps + using Flux + using Zygote + using BSON + using CorpusLoaders + include("ULMFiT/utils.jl") + include("ULMFiT/datadeps.jl") + include("ULMFiT/data_loaders.jl") + include("ULMFiT/custom_layers.jl") + include("ULMFiT/pretrain_lm.jl") + include("ULMFiT/fine_tune_lm.jl") + include("ULMFiT/train_text_classifier.jl") + end + export ULMFiT function __init__() pos_tagger_datadep_register() ner_datadep_register() pos_datadep_register() - #ULMFiT.ulmfit_datadep_register() + ULMFiT.ulmfit_datadep_register() global sentiment_model = artifact"sentiment_model" end diff --git a/src/ULMFiT/custom_layers.jl b/src/ULMFiT/custom_layers.jl index e402c7d..ad6e906 100644 --- a/src/ULMFiT/custom_layers.jl +++ b/src/ULMFiT/custom_layers.jl @@ -8,7 +8,7 @@ This file contains the custom layers defined for this model: PooledDense """ -import Flux: gate, _testmode!, _dropout_kernel +import Flux: gate, testmode!, _dropout_kernel reset_masks!(entity) = nothing reset_probability!(entity) = nothing @@ -44,12 +44,12 @@ Moreover this also follows the Vartional DropOut citeria, that is, the drop mask is remains same for a whole training pass. This is done by saving the masks in 'maskWi' and 'maskWh' fields """ -mutable struct WeightDroppedLSTMCell{A, V, M} +mutable struct WeightDroppedLSTMCell{A, V, S, M} Wi::A Wh::A b::V - h::V - c::V + h::S + c::S p::Float64 maskWi::M maskWh::M @@ -60,17 +60,17 @@ function WeightDroppedLSTMCell(in::Integer, out::Integer, p::Float64=0.0; init = Flux.glorot_uniform) @assert 0 ≤ p ≤ 1 cell = WeightDroppedLSTMCell( - param(init(out*4, in)), - param(init(out*4, out)), - param(init(out*4)), - param(zeros(Float32, out)), - param(zeros(Float32, out)), + init(out*4, in), + init(out*4, out), + init(out*4), + reshape(zeros(Float32, out),out, 1), + reshape(zeros(Float32, out), out, 1), p, drop_mask((out*4, in), p), drop_mask((out*4, out), p), true ) - cell.b.data[gate(out, 2)] .= 1 + cell.b[gate(out, 2)] .= 1 return cell end @@ -88,9 +88,12 @@ function (m::WeightDroppedLSTMCell)((h, c), x) return (h′, c), h′ end -Flux.@treelike WeightDroppedLSTMCell +Flux.@functor WeightDroppedLSTMCell -_testmode!(m::WeightDroppedLSTMCell, test) = (m.active = !test) +Flux.trainable(m::WeightDroppedLSTMCell) = (m.Wi, m.Wh, m.b, m.h, m.c) + +testmode!(m::WeightDroppedLSTMCell, mode=true) = + (m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m) """ WeightDroppedLSTM(in::Integer, out::Integer, p::Float64=0.0) @@ -106,7 +109,7 @@ julia> wd = WeightDroppedLSTM(4, 5, 0.3); function WeightDroppedLSTM(a...; kw...) cell = WeightDroppedLSTMCell(a...;kw...) hidden = (cell.h, cell.c) - return Flux.Recur(cell, hidden, hidden) + return Flux.Recur(cell, hidden) end """ @@ -155,7 +158,9 @@ end AWD_LSTM(in::Integer, out::Integer, p::Float64=0.0; kw...) = AWD_LSTM(WeightDroppedLSTM(in, out, p; kw...), -1, []) -Flux.@treelike AWD_LSTM +Flux.@functor AWD_LSTM + +Flux.trainable(m::AWD_LSTM) = (m.layer,) (m::AWD_LSTM)(in) = m.layer(in) @@ -184,12 +189,12 @@ function asgd_step!(iter::Integer, layer::AWD_LSTM) p = get_trainable_params([layer]) avg_fact = 1/max(iter - layer.T + 1, 1) if avg_fact != 1 - layer.accum = layer.accum .+ Tracker.data.(p) + layer.accum = layer.accum .+ p for (ps, accum) in zip(p, layer.accum) - Tracker.data(ps) .= avg_fact*accum + ps .= avg_fact*accum end else - layer.accum = deepcopy(Tracker.data.(p)) # Accumulator for ASGD + layer.accum = deepcopy(p) # Accumulator for ASGD end end return @@ -230,7 +235,8 @@ function (vd::VarDrop)(x) return (x .* vd.mask) end -_testmode!(vd::VarDrop, test) = (vd.active = !test) +testmode!(m::VarDrop, mode=true) = + (m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m) # method for reseting mask of VarDrop reset_masks!(vd::VarDrop) = (vd.reset = true) @@ -270,7 +276,7 @@ end function DroppedEmbeddings(in::Integer, embed_size::Integer, p::Float64=0.0; init = Flux.glorot_uniform) de = DroppedEmbeddings{AbstractArray, typeof(p)}( - param(init(in, embed_size)), + init(in, embed_size), p, drop_mask((in,), p), true @@ -283,9 +289,12 @@ function (de::DroppedEmbeddings)(x::AbstractArray, tying::Bool=false) return tying ? dropped * x : transpose(dropped[x, :]) end -Flux.@treelike DroppedEmbeddings +Flux.@functor DroppedEmbeddings + +Flux.trainable(m::DroppedEmbeddings) = (m.emb) -_testmode!(de::DroppedEmbeddings, test) = (de.active = !test) +testmode!(m::DroppedEmbeddings, mode=true) = + (m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m) function reset_masks!(de::DroppedEmbeddings) de.mask = drop_mask(de.mask, de.p) @@ -324,10 +333,10 @@ PooledDense(W, b) = PooledDense(W, b, identity) function PooledDense(hidden_sz::Integer, out::Integer, σ = identity; initW = Flux.glorot_uniform, initb = (dims...) -> zeros(Float32, dims...)) -return PooledDense(param(initW(out, hidden_sz*3)), param(initb(out)), σ) +return PooledDense(initW(out, hidden_sz*3), initb(out), σ) end -Flux.@treelike PooledDense +Flux.@functor PooledDense function (a::PooledDense)(x) W, b, σ = a.W, a.b, a.σ diff --git a/src/ULMFiT/data_loaders.jl b/src/ULMFiT/data_loaders.jl index f59e403..839b408 100644 --- a/src/ULMFiT/data_loaders.jl +++ b/src/ULMFiT/data_loaders.jl @@ -27,29 +27,29 @@ function imdb_preprocess(doc::AbstractDocument) length(word) == 1 && return [word] return split(word, symbol) end - text = text(doc) - remove_corrupt_utf8!(text) - remove_case!(text) - prepare!(text, strip_html_tags) - tokens = tokens(text) + text_ = doc + remove_corrupt_utf8!(text_) + remove_case!(text_) + prepare!(text_, strip_html_tags) + tokens_ = tokens(text_) for symbol in [',', '.', '-', '/', "'s"] - tokens = split_word.(tokens, symbol) + tokens_ = split_word.(tokens_, symbol) temp = [] - for token in tokens + for token_ in tokens_ try - append!(temp, put(token, symbol)) + append!(temp, put(token_, symbol)) catch - append!(temp, token) + append!(temp, token_) end end - tokens = temp + tokens_ = temp end - deleteat!(tokens, findall(x -> isequal(x, "")||isequal(x, " "), tokens)) - return tokens + deleteat!(tokens_, findall(x -> isequal(x, "")||isequal(x, " "), tokens_)) + return tokens_ end # Loads WikiText-103 corpus and output a Channel to give a mini-batch at each call -function load_wikitext_103(batchsize::Integer, bptt::Integer; type = "train") +function load_wikitext_103(batchsize::Integer=16, bptt::Integer=70; type = "train") corpuspath = joinpath(datadep"WikiText-103", "wiki.$(type).tokens") corpus = read(open(corpuspath, "r"), String) corpus = tokenize(corpus) @@ -58,13 +58,13 @@ end # IMDB Data loaders for Sentiment Analysis specifically # IMDB data loader for fine-tuning Language Model -function imdb_fine_tune_data(batchsize::Integer, bptt::Integer, num_examples::Integer=50000) +function imdb_fine_tune_data(batchsize::Integer=16, bptt::Integer=70, num_examples::Integer=50000) imdb_dataset = IMDB("train_unsup") dataset = [] - for path in imdb_dataset.filepaths #extract data from the files in directory and put into channel + for path in imdb_dataset.filepaths[1:num_examples] #extract data from the files in directory and put into channel open(path) do fileio cur_text = read(fileio, String) - append!(dataset, imdb_preprocess(cur_text)) + append!(dataset, imdb_preprocess(StringDocument(cur_text))) end #open end #for return Channel(x -> generator(x, dataset; batchsize=batchsize, bptt=bptt)) diff --git a/src/ULMFiT/fine_tune_lm.jl b/src/ULMFiT/fine_tune_lm.jl index 17f33b9..b2e7261 100644 --- a/src/ULMFiT/fine_tune_lm.jl +++ b/src/ULMFiT/fine_tune_lm.jl @@ -24,17 +24,17 @@ opts : `Vector` of optimizers used to update weights for corresponding la NOTE: length(opts) == length(layers) """ -function discriminative_step!(layers, ηL::Float64, l, opts::Vector) +function discriminative_step!(layers, lm::LanguageModel, gen, ηL::Float64, opts::Vector) @assert length(opts) == length(layers) # Gradient calculation - grads = Tracker.gradient(() -> l, get_trainable_params(layers)) + grads = Zygote.gradient(() -> loss(lm, gen), get_trainable_params(layers)) # discriminative step ηl = ηL/(2.6^(length(layers)-1)) for (layer, opt) in zip(layers, opts) opt.eta = ηl for ps in get_trainable_params([layer]) - Tracker.update!(opt, ps, grads[ps]) + Flux.Optimise.update!(opt, ps, grads[ps]) end ηl *= 2.6 end @@ -50,32 +50,28 @@ This function contains main training loops for fine-tuning the language model. To use this funciton, an instance of LanguageModel and a data loader is needed. Read the docs for more info about arguments """ -function fine_tune_lm!(lm::LanguageModel, data_loader::Channel=imdb_fine_tune_data, - stlr_cut_frac::Float64=0.1, stlr_ratio::Float32=32, stlr_η_max::Float64=4e-3; +function fine_tune_lm!(lm=LanguageModel(), data_loader=imdb_fine_tune_data, + stlr_cut_frac::Float64=0.1, stlr_ratio::Float32=Float32(32), stlr_η_max::Float64=4e-3; epochs::Integer=1, checkpoint_itvl::Integer=5000) opts = [ADAM(0.001, (0.7, 0.99)) for i=1:4] - cut = num_of_iters * epochs * stlr_cut_frac - + # Fine-Tuning loops for epoch=1:epochs println("\nEpoch: $epoch") - gen = data_loader() - num_of_iters = take!(gen) + gen = data_loader() + num_of_iters = take!(gen) + cut = num_of_iters * epochs * stlr_cut_frac T = num_of_iters-Int(floor((num_of_iters*2)/100)) set_trigger!.(T, lm.layers) for i=1:num_of_iters - - # FORWARD - l = loss(lm, gen) - # Slanted triangular learning rate step t = i + (epoch-1)*num_of_iters p_frac = (i < cut) ? i/cut : (1 - ((i-cut)/(cut*(1/stlr_cut_frac-1)))) ηL = stlr_η_max*((1+p_frac*(stlr_ratio-1))/stlr_ratio) # Backprop with discriminative fine-tuning step - discriminative_step!(lm.layers[[1, 3, 5, 7]], ηL, l, opts) + discriminative_step!(lm.layers[[1, 3, 5, 7]], lm, gen, ηL, opts) # Resets dropout masks for all the layers with DropOut or DropConnect reset_masks!.(lm.layers) @@ -121,7 +117,7 @@ julia> insert!(vocab, 2, "_pad_") function set_vocab!(lm::LanguageModel, vocab::Vector) idxs = indices(vocab, lm.vocab) lm.vocab = vocab - lm.layers[1].emb = param(Tracker.data(lm.layers[1].emb)[idxs, :]) + lm.layers[1].emb = param(lm.layers[1].emb[idxs, :]) lm.layers[1].mask = gpu(drop_mask((length(vocab),), lm.layers[1].p)) return end diff --git a/src/ULMFiT/pretrain_lm.jl b/src/ULMFiT/pretrain_lm.jl index 74bc573..1afd48d 100644 --- a/src/ULMFiT/pretrain_lm.jl +++ b/src/ULMFiT/pretrain_lm.jl @@ -49,7 +49,7 @@ function LanguageModel(load_pretrained::Bool=false, vocabpath::String=joinpath(@ return lm end -Flux.@treelike LanguageModel +Flux.@functor LanguageModel """ test_lm(lm::LanguageModel, data_gen, num_of_iters::Integer; unknown_token::String="_unk_") @@ -63,7 +63,7 @@ It returns loss, accuracy, precsion, recall and F1 score. julia> test_lm(lm, data_gen, 200, " indices(x, lm.vocab, "_unk_"), batch) + batch = gpu(batch) batch = lm.layers.(batch) return batch end @@ -107,11 +108,11 @@ function loss(lm, gen) end # Backpropagation step while training -function backward!(layers, l, opt) +function backward!(layers, lm, gen, opt) # Calulating gradients and weights updation p = get_trainable_params(layers) - grads = Tracker.gradient(() -> l, p) - Tracker.update!(opt, p, grads) + grads = Zygote.gradient(() -> loss(lm, gen), p) + Flux.Optimise.update!(opt, p, grads) return end @@ -138,11 +139,8 @@ function pretrain_lm!(lm::LanguageModel=LanguageModel(), data_loader::Channel=lo set_trigger!.(T, lm.layers) # Setting triggers for AWD_LSTM layers for i=1:num_of_batches - # FORWARD PASS - l = loss(lm, gen) - # REVERSE PASS - backward!(lm.layers, l, opt) + backward!(lm.layers, lm, gen, opt) # ASGD Step, works after Triggering asgd_step!.(i, lm.layers) @@ -158,7 +156,7 @@ end # To save model function save_model!(m::LanguageModel, filepath::String) - weights = cpu.(Tracker.data.(params(m))) + weights = cpu.(params(m)) BSON.@save filepath weights end @@ -182,7 +180,7 @@ SAMPLING... """ function sample(starting_text::AbstractDocument, lm::LanguageModel) testmode!(lm.layers) - model_layers = mapleaves(Tracker.data, lm.layers) + model_layers = lm.layers tokens = tokens(starting_text) word_indices = map(x -> indices([x], lm.vocab, "_unk_"), tokens) h = (model_layers.(word_indices))[end] diff --git a/src/ULMFiT/sentiment.jl b/src/ULMFiT/sentiment.jl index c70069d..3ab5479 100644 --- a/src/ULMFiT/sentiment.jl +++ b/src/ULMFiT/sentiment.jl @@ -48,12 +48,12 @@ function BinSentimentClassifier() ) ) Flux.loadparams!(sc, weights) - sc = mapleaves(Tracker.data, sc) + sc = sc Flux.testmode!(sc) return sc end -Flux.@treelike BinSentimentClassifier +Flux.@functor BinSentimentClassifier function (sc::BinSentimentClassifier)(x::TokenDocument) remove_case!(x) diff --git a/src/ULMFiT/train_text_classifier.jl b/src/ULMFiT/train_text_classifier.jl index e30912f..702bd21 100644 --- a/src/ULMFiT/train_text_classifier.jl +++ b/src/ULMFiT/train_text_classifier.jl @@ -30,7 +30,7 @@ function TextClassifier(lm::LanguageModel=LanguageModel(), clsfr_out_sz::Integer ) end -Flux.@treelike TextClassifier +Flux.@functor TextClassifier """ Cross Validate @@ -48,7 +48,7 @@ gen will be used for validation """ function validate(tc::TextClassifier, gen::Channel, num_of_batches::Union{Colon, Integer}) n_classes = size(tc.linear_layers[end-2].W, 1) - classifier = mapleaves(Tracker.data, tc) + classifier = tc Flux.testmode!(classifier) loss = 0 iters = take!(gen) @@ -91,15 +91,17 @@ tracked_steps : This is the number of tracked time-steps for Truncated Backpro """ function forward(tc::TextClassifier, gen::Channel, tracked_steps::Integer=32) # swiching off tracking - classifier = mapleaves(Tracker.data, tc) + classifier = tc X = take!(gen) l = length(X) # Truncated Backprop through time - for i=1:ceil(l/now_per_pass)-1 # Tracking is swiched off inside this loop - (i == 1 && l%now_per_pass != 0) ? (last_idx = l%now_per_pass) : (last_idx = now_per_pass) - H = broadcast(x -> indices(x, classifier.vocab, "_unk_"), X[1:last_idx]) - H = classifier.rnn_layers.(H) - X = X[last_idx+1:end] + Zygote.ignore() do + for i=1:ceil(l/tracked_steps)-1 # Tracking is swiched off inside this loop + (i == 1 && l%tracked_steps != 0) ? (last_idx = l%tracked_steps) : (last_idx = tracked_steps) + H = broadcast(x -> indices(x, classifier.vocab, "_unk_"), X[1:last_idx]) + H = classifier.rnn_layers.(H) + X = X[last_idx+1:end] + end end # set the lated hidden states to original model for (t_layer, unt_layer) in zip(tc.rnn_layers[2:end], classifier.rnn_layers[2:end]) @@ -130,7 +132,7 @@ Arguments: classifier : Instance of TextClassifier gen : 'Channel' [data loader], to give a mini-batch -tracked_words : specifies the number of time-steps for which tracking is on +tracked_steps : specifies the number of time-steps for which tracking is on """ function loss(classifier::TextClassifier, gen::Channel, tracked_steps::Integer=32) H = forward(classifier, gen, tracked_steps) @@ -140,6 +142,23 @@ function loss(classifier::TextClassifier, gen::Channel, tracked_steps::Integer=3 return l end +function discriminative_step!(layers, classifier::TextClassifier, gen::Channel, tracked_steps::Integer, ηL::Float64, opts::Vector) + @assert length(opts) == length(layers) + # Gradient calculation + grads = Zygote.gradient(() -> loss(classifier, gen, tracked_steps = tracked_steps), get_trainable_params(layers)) + + # discriminative step + ηl = ηL/(2.6^(length(layers)-1)) + for (layer, opt) in zip(layers, opts) + opt.eta = ηl + for ps in get_trainable_params([layer]) + Flux.Optimise.update!(opt, ps, grads[ps]) + end + ηl *= 2.6 + end + return +end + """ train_classifier!(classifier::TextClassifier=TextClassifier(), classes::Integer=1, data_loader::Channel=imdb_classifier_data, hidden_layer_size::Integer=50;kw...) @@ -151,7 +170,7 @@ function train_classifier!(classifier::TextClassifier=TextClassifier(), classes: data_loader::Channel=imdb_classifier_data, hidden_layer_size::Integer=50; stlr_cut_frac::Float64=0.1, stlr_ratio::Number=32, stlr_η_max::Float64=0.01, val_loader::Channel=nothing, cross_val_batches::Union{Colon, Integer}=:, - epochs::Integer=1, checkpoint_itvl=5000) + epochs::Integer=1, checkpoint_itvl=5000, tracked_steps::Integer=32) trainable = [] append!(trainable, [classifier.rnn_layers[[1, 3, 5, 7]]...]) @@ -166,7 +185,6 @@ function train_classifier!(classifier::TextClassifier=TextClassifier(), classes: num_of_iters = take!(gen) cut = num_of_iters * epochs * stlr_cut_frac for iter=1:num_of_iters - l = loss(classifier, gen, now_per_pass = now_per_pass) # Slanted triangular learning rates t = iter + (epoch-1)*num_of_iters @@ -175,7 +193,7 @@ function train_classifier!(classifier::TextClassifier=TextClassifier(), classes: # Gradual-unfreezing Step with discriminative fine-tuning unfreezed_layers, cur_opts = (epoch < length(trainable)) ? (trainable[end-epoch+1:end], opts[end-epoch+1:end]) : (trainable, opts) - discriminative_step!(unfreezed_layers, ηL, l, cur_opts) + discriminative_step!(unfreezed_layers, classifier, gen, tracked_steps,ηL, cur_opts) reset_masks!.(classifier.rnn_layers) # reset all dropout masks end @@ -203,13 +221,13 @@ All the preprocessing related to the used vocabulary should be done before using Use `prepare!` function to do preprocessing """ function predict(tc::TextClassifier, text_sents::Corpus) - classifier = mapleaves(Tracker.data, tc) + classifier = tc Flux.testmode!(classifier) predictions = [] expr(x) = indices(x, classifier.vocab, "_unk_") for text in text_sents - tokens = tokens(text) - h = classifier.rnn_layers.(expr.(tokens)) + tokens_ = tokens(text) + h = classifier.rnn_layers.(expr.(tokens_)) probability_dist = classifier.linear_layers(h) class = argmax(probaility_dist) push!(predictions, class) diff --git a/src/ULMFiT/utils.jl b/src/ULMFiT/utils.jl index 691354f..64bfd11 100644 --- a/src/ULMFiT/utils.jl +++ b/src/ULMFiT/utils.jl @@ -27,8 +27,8 @@ end init_weights(extreme::AbstractFloat, dims...) = randn(Float32, dims...) .* sqrt(Float32(extreme)) # Generator, whenever it should be called two times since it gives X in first and y in second call -function generator(c::Channel, corpus::AbstractDocument; batchsize::Integer=64, bptt::Integer=70) - X_total = post_pad_sequences(chunk(tokens(corpus), batchsize)) +function generator(c::Channel, corpus; batchsize::Integer=64, bptt::Integer=70) + X_total = post_pad_sequences(Flux.chunk(corpus, batchsize)) n_batches = Int(floor(length(X_total[1])/bptt)) put!(c, n_batches) for i=1:n_batches diff --git a/test/ulmfit.jl b/test/ulmfit.jl index 8ea0092..b3820f9 100644 --- a/test/ulmfit.jl +++ b/test/ulmfit.jl @@ -4,7 +4,7 @@ using BSON @testset "Custom layers" begin @testset "WeightDroppedLSTM" begin wd = ULMFiT.WeightDroppedLSTM(4, 5, 0.3) - @test all(wd.init .== wd.state) + @test all((wd.cell.h, wd.cell.c) .== wd.state) @test size(wd.cell.Wi) == size(wd.cell.maskWi) @test size(wd.cell.Wh) == size(wd.cell.maskWh) @test wd.cell.active @@ -31,10 +31,10 @@ using BSON ULMFiT.asgd_step!(4, awd) @test length(awd.accum) == 3 temp = deepcopy(awd.accum[1][1]) - @test temp == Tracker.data(awd.layer.cell.Wi[1]) + @test temp == awd.layer.cell.Wi[1] ULMFiT.asgd_step!(5, awd) temp += temp - @test temp == Tracker.data(awd.accum[1][1]) + @test temp == awd.accum[1][1] @test length(params(awd)) == 5 end From 95550b9d6af45e451d0b916eb9d1d38abc1190e8 Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Sat, 1 May 2021 04:45:15 -0700 Subject: [PATCH 08/23] change truncate! to reset! --- src/TextModels.jl | 1 + src/ULMFiT/custom_layers.jl | 13 ++++++------- src/ULMFiT/pretrain_lm.jl | 4 ++-- src/ULMFiT/train_text_classifier.jl | 2 +- test/runtests.jl | 10 +++++----- test/ulmfit.jl | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/TextModels.jl b/src/TextModels.jl index f437cb1..9f8b5ca 100644 --- a/src/TextModels.jl +++ b/src/TextModels.jl @@ -43,6 +43,7 @@ module TextModels using TextAnalysis using DataDeps using Flux + using Flux:crossentropy using Zygote using BSON using CorpusLoaders diff --git a/src/ULMFiT/custom_layers.jl b/src/ULMFiT/custom_layers.jl index ad6e906..a07fd4a 100644 --- a/src/ULMFiT/custom_layers.jl +++ b/src/ULMFiT/custom_layers.jl @@ -48,8 +48,7 @@ mutable struct WeightDroppedLSTMCell{A, V, S, M} Wi::A Wh::A b::V - h::S - c::S + state0::S p::Float64 maskWi::M maskWh::M @@ -63,8 +62,8 @@ function WeightDroppedLSTMCell(in::Integer, out::Integer, p::Float64=0.0; init(out*4, in), init(out*4, out), init(out*4), - reshape(zeros(Float32, out),out, 1), - reshape(zeros(Float32, out), out, 1), + (reshape(zeros(Float32, out),out, 1), + reshape(zeros(Float32, out), out, 1)), p, drop_mask((out*4, in), p), drop_mask((out*4, out), p), @@ -90,7 +89,7 @@ end Flux.@functor WeightDroppedLSTMCell -Flux.trainable(m::WeightDroppedLSTMCell) = (m.Wi, m.Wh, m.b, m.h, m.c) +Flux.trainable(m::WeightDroppedLSTMCell) = (m.Wi, m.Wh, m.b, m.state0...) testmode!(m::WeightDroppedLSTMCell, mode=true) = (m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m) @@ -108,7 +107,7 @@ julia> wd = WeightDroppedLSTM(4, 5, 0.3); """ function WeightDroppedLSTM(a...; kw...) cell = WeightDroppedLSTMCell(a...;kw...) - hidden = (cell.h, cell.c) + hidden = cell.state0 return Flux.Recur(cell, hidden) end @@ -291,7 +290,7 @@ end Flux.@functor DroppedEmbeddings -Flux.trainable(m::DroppedEmbeddings) = (m.emb) +Flux.trainable(m::DroppedEmbeddings) = (m.emb,) testmode!(m::DroppedEmbeddings, mode=true) = (m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m) diff --git a/src/ULMFiT/pretrain_lm.jl b/src/ULMFiT/pretrain_lm.jl index 1afd48d..6f5ca3a 100644 --- a/src/ULMFiT/pretrain_lm.jl +++ b/src/ULMFiT/pretrain_lm.jl @@ -102,8 +102,8 @@ end function loss(lm, gen) H = forward(lm, take!(gen)) Y = broadcast(x -> gpu(Flux.onehotbatch(x, lm.vocab, "_unk_")), take!(gen)) - l = sum(crossentropy.(H, Y)) - Flux.truncate!(lm.layers) + l = sum(Flux.crossentropy.(H, Y)) + Flux.reset!(lm.layers) return l end diff --git a/src/ULMFiT/train_text_classifier.jl b/src/ULMFiT/train_text_classifier.jl index 702bd21..2530f67 100644 --- a/src/ULMFiT/train_text_classifier.jl +++ b/src/ULMFiT/train_text_classifier.jl @@ -20,7 +20,7 @@ function TextClassifier(lm::LanguageModel=LanguageModel(), clsfr_out_sz::Integer lm.vocab, lm.layers[1:8], Chain( - gpu(PooledDense(length(lm.layers[7].layer.cell.h), clsfr_hidden_sz)), + gpu(PooledDense(length(lm.layers[7].layer.cell.state0[1]), clsfr_hidden_sz)), gpu(BatchNorm(clsfr_hidden_sz, relu)), Dropout(clsfr_hidden_drop), gpu(Dense(clsfr_hidden_sz, clsfr_out_sz)), diff --git a/test/runtests.jl b/test/runtests.jl index 1221b31..dae92ff 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,8 +5,8 @@ using TextModels println("Running tests:") include("crf.jl") -include("ner.jl") -include("pos.jl") -include("averagePerceptronTagger.jl") -#include("ulmfit.jl") -include("sentiment.jl") +#include("ner.jl") +#include("pos.jl") +#include("averagePerceptronTagger.jl") +include("ulmfit.jl") +#include("sentiment.jl") diff --git a/test/ulmfit.jl b/test/ulmfit.jl index b3820f9..cdd8fdd 100644 --- a/test/ulmfit.jl +++ b/test/ulmfit.jl @@ -4,7 +4,7 @@ using BSON @testset "Custom layers" begin @testset "WeightDroppedLSTM" begin wd = ULMFiT.WeightDroppedLSTM(4, 5, 0.3) - @test all((wd.cell.h, wd.cell.c) .== wd.state) + @test all((wd.cell.state0) .== wd.state) @test size(wd.cell.Wi) == size(wd.cell.maskWi) @test size(wd.cell.Wh) == size(wd.cell.maskWh) @test wd.cell.active From 0628c2df320bf39b380c880b307f8e2c23cde288 Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Sat, 1 May 2021 07:51:28 -0700 Subject: [PATCH 09/23] Updated and verified tests --- src/ULMFiT/custom_layers.jl | 21 ++++++++++++++++----- src/ULMFiT/pretrain_lm.jl | 2 +- src/ULMFiT/train_text_classifier.jl | 2 +- test/crf.jl | 6 +++--- test/runtests.jl | 7 +++---- test/ulmfit.jl | 2 +- 6 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/ULMFiT/custom_layers.jl b/src/ULMFiT/custom_layers.jl index a07fd4a..c0275a6 100644 --- a/src/ULMFiT/custom_layers.jl +++ b/src/ULMFiT/custom_layers.jl @@ -48,7 +48,8 @@ mutable struct WeightDroppedLSTMCell{A, V, S, M} Wi::A Wh::A b::V - state0::S + h::S + c::S p::Float64 maskWi::M maskWh::M @@ -62,8 +63,8 @@ function WeightDroppedLSTMCell(in::Integer, out::Integer, p::Float64=0.0; init(out*4, in), init(out*4, out), init(out*4), - (reshape(zeros(Float32, out),out, 1), - reshape(zeros(Float32, out), out, 1)), + reshape(zeros(Float32, out),out, 1), + reshape(zeros(Float32, out), out, 1), p, drop_mask((out*4, in), p), drop_mask((out*4, out), p), @@ -89,7 +90,7 @@ end Flux.@functor WeightDroppedLSTMCell -Flux.trainable(m::WeightDroppedLSTMCell) = (m.Wi, m.Wh, m.b, m.state0...) +Flux.trainable(m::WeightDroppedLSTMCell) = (m.Wi, m.Wh, m.b, m.h, m.c) testmode!(m::WeightDroppedLSTMCell, mode=true) = (m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m) @@ -107,10 +108,20 @@ julia> wd = WeightDroppedLSTM(4, 5, 0.3); """ function WeightDroppedLSTM(a...; kw...) cell = WeightDroppedLSTMCell(a...;kw...) - hidden = cell.state0 + hidden = (cell.h, cell.c) return Flux.Recur(cell, hidden) end +# over definition for reset! to work with pretrained model +function reset!(m) + try + (m.state = (m.cell.h, m.cell.c)) + catch + Flux.reset!(m) + end +end + + """ reset_masks!(layer) diff --git a/src/ULMFiT/pretrain_lm.jl b/src/ULMFiT/pretrain_lm.jl index 6f5ca3a..1a59112 100644 --- a/src/ULMFiT/pretrain_lm.jl +++ b/src/ULMFiT/pretrain_lm.jl @@ -103,7 +103,7 @@ function loss(lm, gen) H = forward(lm, take!(gen)) Y = broadcast(x -> gpu(Flux.onehotbatch(x, lm.vocab, "_unk_")), take!(gen)) l = sum(Flux.crossentropy.(H, Y)) - Flux.reset!(lm.layers) + reset!(lm.layers) return l end diff --git a/src/ULMFiT/train_text_classifier.jl b/src/ULMFiT/train_text_classifier.jl index 2530f67..702bd21 100644 --- a/src/ULMFiT/train_text_classifier.jl +++ b/src/ULMFiT/train_text_classifier.jl @@ -20,7 +20,7 @@ function TextClassifier(lm::LanguageModel=LanguageModel(), clsfr_out_sz::Integer lm.vocab, lm.layers[1:8], Chain( - gpu(PooledDense(length(lm.layers[7].layer.cell.state0[1]), clsfr_hidden_sz)), + gpu(PooledDense(length(lm.layers[7].layer.cell.h), clsfr_hidden_sz)), gpu(BatchNorm(clsfr_hidden_sz, relu)), Dropout(clsfr_hidden_drop), gpu(Dense(clsfr_hidden_sz, clsfr_out_sz)), diff --git a/test/crf.jl b/test/crf.jl index d88e32e..3f9246a 100644 --- a/test/crf.jl +++ b/test/crf.jl @@ -118,14 +118,14 @@ using TextModels: score_sequence, forward_score function train() for d in data - reset!(lstm) + Flux.reset!(lstm) grads = gradient(() -> loss(d[1], d[2]), ps) Flux.Optimise.update!(opt, ps, grads) end end function find_loss(d) - reset!(lstm) + Flux.reset!(lstm) loss(d[1], d[2]) end to_sum = [find_loss(d) for d in data] @@ -138,7 +138,7 @@ using TextModels: score_sequence, forward_score train() end - dense_param_2 = deepcopy(d_out.W)) + dense_param_2 = deepcopy(d_out.W) lstm_param_2 = deepcopy(lstm.cell.Wh) crf_param_2 = deepcopy(c.W) l2 = sum([find_loss(d) for d in data]) diff --git a/test/runtests.jl b/test/runtests.jl index dae92ff..f28f076 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,8 +5,7 @@ using TextModels println("Running tests:") include("crf.jl") -#include("ner.jl") -#include("pos.jl") -#include("averagePerceptronTagger.jl") +include("ner.jl") +include("pos.jl") +include("averagePerceptronTagger.jl") include("ulmfit.jl") -#include("sentiment.jl") diff --git a/test/ulmfit.jl b/test/ulmfit.jl index cdd8fdd..b3820f9 100644 --- a/test/ulmfit.jl +++ b/test/ulmfit.jl @@ -4,7 +4,7 @@ using BSON @testset "Custom layers" begin @testset "WeightDroppedLSTM" begin wd = ULMFiT.WeightDroppedLSTM(4, 5, 0.3) - @test all((wd.cell.state0) .== wd.state) + @test all((wd.cell.h, wd.cell.c) .== wd.state) @test size(wd.cell.Wi) == size(wd.cell.maskWi) @test size(wd.cell.Wh) == size(wd.cell.maskWh) @test wd.cell.active From 94edf5f90b0a111380409c284c7cac2e120b59ea Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Tue, 18 May 2021 05:05:59 -0700 Subject: [PATCH 10/23] Update CI build to 1.6 --- .github/workflows/ci.yml | 11 +++++------ .travis.yml | 7 +++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 67b437d..8e04faf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,8 +13,7 @@ jobs: strategy: matrix: version: - - '1.3' - - '1' + - '1.6' - 'nightly' os: - ubuntu-latest @@ -29,12 +28,12 @@ jobs: # MacOS not available on x86 - {os: 'macOS-latest', arch: 'x86'} # Don't test on all versions - - {os: 'macOS-latest', version: '1.3'} + - {os: 'macOS-latest', version: '1.6'} - {os: 'macOS-latest', version: 'nightly'} - - {os: 'windows-latest', version: '1.3'} + - {os: 'windows-latest', version: '1.6'} - {os: 'windows-latest', version: 'nightly'} - {os: 'windows-latest', arch: 'x86'} - - {arch: 'x86', version: '1.3'} + - {arch: 'x86', version: '1.6'} - {arch: 'x86', version: 'nightly'} steps: - uses: actions/checkout@v1 @@ -55,7 +54,7 @@ jobs: - uses: actions/checkout@v1 - uses: julia-actions/setup-julia@latest with: - version: '1.5' + version: '1.6' - run: julia --project=docs -e ' using Pkg; Pkg.develop(PackageSpec(; path=pwd())); diff --git a/.travis.yml b/.travis.yml index bf028c9..8e8320a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,17 +6,16 @@ os: env: - DATADEPS_ALWAYS_ACCEPT=true julia: - - 1.3 - - 1 + - 1.6 - nightly matrix: allow_failures: - julia: nightly exclude: - os: osx - julia: 1.3 + julia: 1.6 - os: windows - julia: 1.3 + julia: 1.6 - os: osx julia: nightly - os: windows From 8398f61d158585426fe962f1915ef0576a6eaec2 Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Fri, 21 May 2021 04:03:27 -0700 Subject: [PATCH 11/23] Update CorpusLoaders version --- Project.toml | 2 +- src/TextModels.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index df1e03f..e8a08dd 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,7 @@ Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Languages = "8ef0a80b-9436-5d2c-a485-80b904378c43" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" @@ -24,7 +25,6 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] BSON = "0.3.3" -DataDeps = "0.7.7" DataStructures = "0.18.9" Flux = "0.12.2" JSON = "0.21.1" diff --git a/src/TextModels.jl b/src/TextModels.jl index 9f8b5ca..5c88496 100644 --- a/src/TextModels.jl +++ b/src/TextModels.jl @@ -41,12 +41,12 @@ module TextModels # ULMFiT module ULMFiT using TextAnalysis - using DataDeps using Flux using Flux:crossentropy using Zygote using BSON using CorpusLoaders + using DataDeps include("ULMFiT/utils.jl") include("ULMFiT/datadeps.jl") include("ULMFiT/data_loaders.jl") From 6cd9824188b96d7c35c8c093eb916ddcfb49badd Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Fri, 21 May 2021 05:08:55 -0700 Subject: [PATCH 12/23] Reshape pretrained weights ULMFiT LM --- src/ULMFiT/pretrain_lm.jl | 5 +++++ test/ulmfit.jl | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/src/ULMFiT/pretrain_lm.jl b/src/ULMFiT/pretrain_lm.jl index 1a59112..e659f8e 100644 --- a/src/ULMFiT/pretrain_lm.jl +++ b/src/ULMFiT/pretrain_lm.jl @@ -163,6 +163,11 @@ end # To load model function load_model!(lm::LanguageModel, filepath::String) BSON.@load filepath weights + # reshape saved weights to match Recurr (h, c) shape + layers = [5, 6, 10, 11, 15, 16] + for l in layers + weights[l] = reshape(weights[l], length(weights[l]), 1) + end Flux.loadparams!(lm, weights) end diff --git a/test/ulmfit.jl b/test/ulmfit.jl index b3820f9..3deca62 100644 --- a/test/ulmfit.jl +++ b/test/ulmfit.jl @@ -95,6 +95,12 @@ end @test length(ULMFiT.get_trainable_params(lm.layers)) == 10 pretrained_weights = BSON.load(datadep"Pretrained ULMFiT Language Model/ulmfit_lm_en.bson") + # reshape weights of (h, c) + layers = [5, 6, 10, 11, 15, 16] + for i in layers + pretrained_weights[:weights][i] = reshape(pretrained_weights[:weights][i], length(pretrained_weights[:weights][i]), 1) + end + @test length(pretrained_weights[:weights]) == 16 @test all(size.(params(lm)) .== size.(pretrained_weights[:weights])) end From 47015785632dc881a079794b1f1456d585b7e401 Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Fri, 21 May 2021 09:25:03 -0700 Subject: [PATCH 13/23] Update crf test --- src/CRF/loss.jl | 2 +- test/crf.jl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/CRF/loss.jl b/src/CRF/loss.jl index c1405fd..32501bd 100644 --- a/src/CRF/loss.jl +++ b/src/CRF/loss.jl @@ -5,7 +5,7 @@ Compute the Normalization / partition function or the Forward Algorithm score - `Z` """ function forward_score(c::CRF, x, init_α) - forward_var = log_sum_exp((c.W .+ (x[1]') .+ init_α)) + forward_var = log_sum_exp(c.W .+ x[1]' .+ init_α) for i in 2:length(x) forward_var = log_sum_exp((c.W .+ x[i]') .+ forward_var') diff --git a/test/crf.jl b/test/crf.jl index 3f9246a..a548a4b 100644 --- a/test/crf.jl +++ b/test/crf.jl @@ -1,6 +1,5 @@ using Flux -using Flux: LSTM, Dense, reset!, onehot, RNN -using Zygote: gradient +using Flux: gradient, LSTM, Dense, reset!, onehot, RNN, params using TextModels: score_sequence, forward_score @testset "crf" begin @@ -109,7 +108,7 @@ using TextModels: score_sequence, forward_score init_α = fill(-10000, (c.n + 2, 1)) init_α[c.n + 1] = 0 - loss(xs, ys) = crf_loss(c, m(xs), ys, init_α) + loss(xs, ys) = crf_loss(c, m(xs), ys, init_α) + 1e-4*sum(c.W.*c.W) opt = Descent(0.01) data = zip(X, Y) @@ -149,3 +148,4 @@ using TextModels: score_sequence, forward_score @test crf_param_1 != crf_param_2 end end + From 2251a40697abbe8d686798fe2bae02232b3d2fe5 Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Mon, 24 May 2021 05:18:15 -0700 Subject: [PATCH 14/23] Move docs from TextAnalysis --- docs/make.jl | 4 +- docs/src/sentiment.md | 41 ++++++++ docs/src/tagging.md | 237 ++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 4 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 docs/src/sentiment.md create mode 100644 docs/src/tagging.md diff --git a/docs/make.jl b/docs/make.jl index 5876f79..a2f72fd 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -8,8 +8,10 @@ makedocs( pages = [ "Home" => "index.md", "Conditional Random Fields" => "crf.md", - "Named Entity Recognition" => "ner.md", "ULMFiT" => "ULMFiT.md", + "Named Entity Recognition" => "ner.md", + "Tagging Schemes" => "tagging.md.md", + "Sentiment Analyzer" => "sentiment.md", "API References" => "APIReference.md" ], ) diff --git a/docs/src/sentiment.md b/docs/src/sentiment.md new file mode 100644 index 0000000..e2cfe57 --- /dev/null +++ b/docs/src/sentiment.md @@ -0,0 +1,41 @@ +## Sentiment Analyzer + +It can be used to find the sentiment score (between 0 and 1) of a word, sentence or a Document. +A trained model (using Flux) on IMDB word corpus with weights saved are used to calculate the sentiments. + + model = SentimentAnalyzer() + model(doc) + model(doc, handle_unknown) + +* doc = Input Document for calculating document (AbstractDocument type) +* handle_unknown = A function for handling unknown words. Should return an array (default (x)->[]) + +```julia +julia> using TextAnalysis + +julia> m = SentimentAnalyzer() +Sentiment Analysis Model Trained on IMDB with a 88587 word corpus + +julia> d1 = StringDocument("a very nice thing that everyone likes") +A StringDocument{String} + * Language: Languages.English() + * Title: Untitled Document + * Author: Unknown Author + * Timestamp: Unknown Time + * Snippet: a very nice thing that everyone likes + +julia> m(d1) +0.5183109f0 + +julia> d = StringDocument("a horrible thing that everyone hates") +A StringDocument{String} + * Language: Languages.English() + * Title: Untitled Document + * Author: Unknown Author + * Timestamp: Unknown Time + * Snippet: a horrible thing that everyone hates + +julia> m(d2) +0.47193584f0 + +``` diff --git a/docs/src/tagging.md b/docs/src/tagging.md new file mode 100644 index 0000000..90d85cf --- /dev/null +++ b/docs/src/tagging.md @@ -0,0 +1,237 @@ +## Tagging_schemes + +There are many tagging schemes used for sequence labelling. +TextAnalysis currently offers functions for conversion between these tagging format. + +* BIO1 +* BIO2 +* BIOES + +```julia +julia> tags = ["I-LOC", "O", "I-PER", "B-MISC", "I-MISC", "B-PER", "I-PER", "I-PER"] + +julia> tag_scheme!(tags, "BIO1", "BIOES") + +julia> tags +8-element Array{String,1}: + "S-LOC" + "O" + "S-PER" + "B-MISC" + "E-MISC" + "B-PER" + "I-PER" + "E-PER" +``` + +## Parts of Speech Tagging + +This package provides with two different Part of Speech Tagger. + +## Average Perceptron Part of Speech Tagger + +This tagger can be used to find the POS tag of a word or token in a given sentence. It is a based on `Average Perceptron Algorithm`. +The model can be trained from scratch and weights are saved in specified location. +The pretrained model can also be loaded and can be used directly to predict tags. + +### To train model: +```julia +julia> tagger = PerceptronTagger(false) #we can use tagger = PerceptronTagger() +julia> fit!(tagger, [[("today","NN"),("is","VBZ"),("good","JJ"),("day","NN")]]) +iteration : 1 +iteration : 2 +iteration : 3 +iteration : 4 +iteration : 5 +``` + +### To load pretrained model: +```julia +julia> tagger = PerceptronTagger(true) +loaded successfully +PerceptronTagger(AveragePerceptron(Set(Any["JJS", "NNP_VBZ", "NN_NNS", "CC", "NNP_NNS", "EX", "NNP_TO", "VBD_DT", "LS", ("Council", "NNP") … "NNPS", "NNP_LS", "VB", "NNS_NN", "NNP_SYM", "VBZ", "VBZ_JJ", "UH", "SYM", "NNP_NN", "CD"]), Dict{Any,Any}("i+2 word wetlands"=>Dict{Any,Any}("NNS"=>0.0,"JJ"=>0.0,"NN"=>0.0),"i-1 tag+i word NNP basic"=>Dict{Any,Any}("JJ"=>0.0,"IN"=>0.0),"i-1 tag+i word DT chloride"=>Dict{Any,Any}("JJ"=>0.0,"NN"=>0.0),"i-1 tag+i word NN choo"=>Dict{Any,Any}("NNP"=>0.0,"NN"=>0.0),"i+1 word antarctica"=>Dict{Any,Any}("FW"=>0.0,"NN"=>0.0),"i-1 tag+i word -START- appendix"=>Dict{Any,Any}("NNP"=>0.0,"NNPS"=>0.0,"NN"=>0.0),"i-1 word wahoo"=>Dict{Any,Any}("JJ"=>0.0,"VBD"=>0.0),"i-1 tag+i word DT children's"=>Dict{Any,Any}("NNS"=>0.0,"NN"=>0.0),"i word dnipropetrovsk"=>Dict{Any,Any}("NNP"=>0.003,"NN"=>-0.003),"i suffix hla"=>Dict{Any,Any}("JJ"=>0.0,"NN"=>0.0)…), DefaultDict{Any,Any,Int64}(), DefaultDict{Any,Any,Int64}(), 1, ["-START-", "-START2-"]), Dict{Any,Any}("is"=>"VBZ","at"=>"IN","a"=>"DT","and"=>"CC","for"=>"IN","by"=>"IN","Retrieved"=>"VBN","was"=>"VBD","He"=>"PRP","in"=>"IN"…), Set(Any["JJS", "NNP_VBZ", "NN_NNS", "CC", "NNP_NNS", "EX", "NNP_TO", "VBD_DT", "LS", ("Council", "NNP") … "NNPS", "NNP_LS", "VB", "NNS_NN", "NNP_SYM", "VBZ", "VBZ_JJ", "UH", "SYM", "NNP_NN", "CD"]), ["-START-", "-START2-"], ["-END-", "-END2-"], Any[]) +``` + +### To predict tags: + +The perceptron tagger can predict tags over various document types- + + predict(tagger, sentence::String) + predict(tagger, Tokens::Array{String, 1}) + predict(tagger, sd::StringDocument) + predict(tagger, fd::FileDocument) + predict(tagger, td::TokenDocument) + +This can also be done by - + tagger(input) + + +```julia +julia> predict(tagger, ["today", "is"]) +2-element Array{Any,1}: + ("today", "NN") + ("is", "VBZ") + +julia> tagger(["today", "is"]) +2-element Array{Any,1}: + ("today", "NN") + ("is", "VBZ") +``` + +`PerceptronTagger(load::Bool)` + +* load = Boolean argument if `true` then pretrained model is loaded + +`fit!(self::PerceptronTagger, sentences::Vector{Vector{Tuple{String, String}}}, save_loc::String, nr_iter::Integer)` + +* self = `PerceptronTagger` object +* sentences = `Vector` of `Vector` of `Tuple` of pair of word or token and its POS tag [see above example] +* save_loc = location of file to save the trained weights +* nr_iter = Number of iterations to pass the `sentences` to train the model ( default 5) + +`predict(self::PerceptronTagger, tokens)` + +* self = PerceptronTagger +* tokens = `Vector` of words or tokens for which to predict tags + +## Neural Model for Part of Speech tagging using LSTMs, CNN and CRF + +The API provided is a pretrained model for tagging Part of Speech. +The current model tags all the POS Tagging is done based on [convention used in Penn Treebank](https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html), with 36 different Part of Speech tags excludes punctuation. + +To use the API, we first load the model weights into an instance of tagger. +The function also accepts the path of model_weights and model_dicts (for character and word embeddings) + + PoSTagger() + PoSTagger(dicts_path, weights_path) + +```julia +julia> pos = PoSTagger() + +``` + +!!! note + When you call `PoSTagger()` for the first time, the package will request permission for download the `Model_dicts` and `Model_weights`. Upon downloading, these are store locally and managed by `DataDeps`. So, on subsequent uses the weights will not need to be downloaded again. + +Once we create an instance, we can call it to tag a String (sentence), sequence of tokens, `AbstractDocument` or `Corpus`. + + (pos::PoSTagger)(sentence::String) + (pos::PoSTagger)(tokens::Array{String, 1}) + (pos::PoSTagger)(sd::StringDocument) + (pos::PoSTagger)(fd::FileDocument) + (pos::PoSTagger)(td::TokenDocument) + (pos::PoSTagger)(crps::Corpus) + +```julia + +julia> sentence = "This package is maintained by John Doe." +"This package is maintained by John Doe." + +julia> tags = pos(sentence) +8-element Array{String,1}: + "DT" + "NN" + "VBZ" + "VBN" + "IN" + "NNP" + "NNP" + "." + +``` + +The API tokenizes the input sentences via the default tokenizer provided by `WordTokenizers`, this currently being set to the multilingual `TokTok Tokenizer.` + +``` + +julia> using WordTokenizers + +julia> collect(zip(WordTokenizers.tokenize(sentence), tags)) +8-element Array{Tuple{String,String},1}: + ("This", "DT") + ("package", "NN") + ("is", "VBZ") + ("maintained", "VBN") + ("by", "IN") + ("John", "NNP") + ("Doe", "NNP") + (".", ".") + +``` + +For tagging a multisentence text or document, once can use `split_sentences` from `WordTokenizers.jl` package and run the pos model on each. + +```julia +julia> sentences = "Rabinov is winding up his term as ambassador. He will be replaced by Eliahu Ben-Elissar, a former Israeli envoy to Egypt and right-wing Likud party politiian." # Sentence taken from CoNLL 2003 Dataset + +julia> splitted_sents = WordTokenizers.split_sentences(sentences) + +julia> tag_sequences = pos.(splitted_sents) +2-element Array{Array{String,1},1}: + ["NNP", "VBZ", "VBG", "RP", "PRP\$", "NN", "IN", "NN", "."] + ["PRP", "MD", "VB", "VBN", "IN", "NNP", "NNP", ",", "DT", "JJ", "JJ", "NN", "TO", "NNP", "CC", "JJ", "NNP", "NNP", "NNP", "."] + +julia> zipped = [collect(zip(tag_sequences[i], WordTokenizers.tokenize(splitted_sents[i]))) for i in eachindex(splitted_sents)] + +julia> zipped[1] +9-element Array{Tuple{String,String},1}: + ("NNP", "Rabinov") + ("VBZ", "is") + ("VBG", "winding") + ("RP", "up") + ("PRP\$", "his") + ("NN", "term") + ("IN", "as") + ("NN", "ambassador") + (".", ".") + +julia> zipped[2] +20-element Array{Tuple{String,String},1}: + ("PRP", "He") + ("MD", "will") + ("VB", "be") + ("VBN", "replaced") + ("IN", "by") + ("NNP", "Eliahu") + ("NNP", "Ben-Elissar") + (",", ",") + ("DT", "a") + ("JJ", "former") + ("JJ", "Israeli") + ("NN", "envoy") + ("TO", "to") + ("NNP", "Egypt") + ("CC", "and") + ("JJ", "right-wing") + ("NNP", "Likud") + ("NNP", "party") + ("NNP", "politiian") + (".", ".") + +``` + +Since the tagging the Part of Speech is done on sentence level, +the text of `AbstractDocument` is sentence_tokenized and then labelled for over sentence. +However is not possible for `NGramDocument` as text cannot be recreated. +For `TokenDocument`, text is approximated for splitting into sentences, hence the following throws a warning when tagging the `Corpus`. + +```julia + +julia> crps = Corpus([StringDocument("We aRE vErY ClOSE tO ThE HEaDQuarTeRS."), TokenDocument("this is Bangalore.")]) +A Corpus with 2 documents: + * 1 StringDocument's + * 0 FileDocument's + * 1 TokenDocument's + * 0 NGramDocument's + +Corpus's lexicon contains 0 tokens +Corpus's index contains 0 tokens + +julia> pos(crps) +┌ Warning: TokenDocument's can only approximate the original text +└ @ TextAnalysis ~/.julia/dev/TextAnalysis/src/document.jl:220 +2-element Array{Array{Array{String,1},1},1}: + [["PRP", "VBP", "RB", "JJ", "TO", "DT", "NN", "."]] + [["DT", "VBZ", "NNP", "."]] + +``` diff --git a/test/runtests.jl b/test/runtests.jl index f28f076..2738bfa 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,5 +7,6 @@ println("Running tests:") include("crf.jl") include("ner.jl") include("pos.jl") +include("sentiment.jl") include("averagePerceptronTagger.jl") include("ulmfit.jl") From 4d9976d13983f217911fdfdf7686c113fd497f68 Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Mon, 24 May 2021 05:22:55 -0700 Subject: [PATCH 15/23] Correction in make.jl --- docs/make.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index a2f72fd..64de323 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -10,7 +10,7 @@ makedocs( "Conditional Random Fields" => "crf.md", "ULMFiT" => "ULMFiT.md", "Named Entity Recognition" => "ner.md", - "Tagging Schemes" => "tagging.md.md", + "Tagging Schemes" => "tagging.md", "Sentiment Analyzer" => "sentiment.md", "API References" => "APIReference.md" ], From 4f3481b3110c9d2270432856780dca32114b27af Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Mon, 24 May 2021 06:19:55 -0700 Subject: [PATCH 16/23] Update docs of crf and ULMFiT --- docs/src/ULMFiT.md | 216 +++++++++++++++++++++++---------------------- docs/src/crf.md | 31 +++---- 2 files changed, 125 insertions(+), 122 deletions(-) diff --git a/docs/src/ULMFiT.md b/docs/src/ULMFiT.md index 332e2fd..89622d1 100644 --- a/docs/src/ULMFiT.md +++ b/docs/src/ULMFiT.md @@ -18,37 +18,38 @@ Default data loaders are provided in the `data_loaders.jl`: In this step, Language Model will learn the general properties of the Language. To train the model we need a general domain corpus like WikiText-103. For training, a `generator` function is provided to create a `Channel` which will give mini-batch in every call. After pre-processing the corpus, the tokenized corpus is given as input to the generator function and the Channel can be created like so: ```julia -julia> loader = Channel(x -> generator(x, corpus; batchsize=4, bptt=10)) -Channel{Any}(sz_max:0,sz_curr:1) +julia> loader = ULMFiT.imdb_fine_tune_data(4, 10) # batchsize=4, bptt=10 +Channel{Any}(0) (1 item available) julia> max_batches = take!(loader) # this is the first call to the loader # These are the subsequent calls in pairs for X and Y -julia> X = take!(Loaders) - 10-element Array{Array{Any,1},1}: - ["senjō", ",", "indicated", "after"] - ["no", "he", ",", "two"] - ["valkyria", "sent", "\"", "games"] - ["3", "a", "i", ","] - [":", "formal", "am", "making"] - ["", "demand", "to", "a"] - ["chronicles", "for", "some", "start"] - ["(", "surrender", "extent", "against"] - ["japanese", "of", "influenced", "the"] - [":", "the", "by", "vancouver"] - -julia> Y = take!(gen) -10-element Array{Array{Any,1},1}: -["no", "he", ",", "two"] -["valkyria", "sent", "\"", "games"] -["3", "a", "i", ","] -[":", "formal", "am", "making"] -["", "demand", "to", "a"] -["chronicles", "for", "some", "start"] -["(", "surrender", "extent", "against"] -["japanese", "of", "influenced", "the"] -[":", "the", "by", "vancouver"] -["戦場のヴァルキュリア", "arsenal", "them", "canucks"] +julia> X = take!(loader) +10-element Vector{Vector{Any}}: + ["i", "transparent", "it", "were"] + ["admit", "villain", "immediately", "all"] + [",", "who", "as", "first"] + ["the", "talks", "she", "rate"] + ["great", "like", "is", "."] + ["majority", "mortimer", "on", "even"] + ["of", "snerd", "for", "veda"] + ["films", "and", "a", "ann"] + ["released", "has", "few", "borg"] + ["before", "an", "seconds", "in"] + +julia> Y = take!(loader) +10-element Vector{Vector{Any}}: + ["admit", "villain", "immediately", "all"] + [",", "who", "as", "first"] + ["the", "talks", "she", "rate"] + ["great", "like", "is", "."] + ["majority", "mortimer", "on", "even"] + ["of", "snerd", "for", "veda"] + ["films", "and", "a", "ann"] + ["released", "has", "few", "borg"] + ["before", "an", "seconds", "in"] + ["say", "office", ",", "a"] + ``` Note that at the first call to this `Channel` the output will be maximum number of batches which it can give. Two calls to this `Channel` completed one batch, that is, it doesnot give `X` and `Y` both together in one call, two calls are needed, one first `X` is given out and in second `Y`. Also, to understand what are `batchsize` and `bptt`, refer this [blog](https://nextjournal.com/ComputerMaestro/jsoc19-practical-implementation-of-ulmfit-in-julia-2). @@ -199,24 +200,24 @@ This is basically a modification to the original LSTM layer. The layer uses [Dro ```julia # maskWi and maskWh are drop masks for Wi and Wh weights -julia> fieldnames(WeightDroppedLSTMCell) +julia> fieldnames(ULMFiT.WeightDroppedLSTMCell) (:Wi, :Wh, :b, :h, :c, :p, :maskWi, :maskWh, :active) # To deine a layer with 4 input size and 5 output size and 0.3 dropping probability -julia> wd = WeightDroppedLSTM(4, 5, 0.3); +julia> wd = ULMFiT.WeightDroppedLSTM(4, 5, 0.3); # Pass julia> x = rand(4); julia> h = wd(x) -Tracked 5-element Array{Float64,1}: - 0.06149460838123775 - -0.06028818475111407 - 0.07400426274491535 - -0.20671647527394219 - -0.00678279380721769 +5×1 Matrix{Float64}: + 0.17602923394922002 + 0.08615001440875035 + 0.015924513976372016 + 0.10526862977034518 + -0.04417581280319146 # To reset_masks! -julia> reset_masks!(wd) +julia> ULMFiT.reset_masks!(wd) ``` ### Averaged-SGD LSTM (AWD_LSTM) @@ -226,63 +227,63 @@ This is a regular LSTM layer with Variational DropConnect and weights averaging ```julia # `accum` field is used to store the sum of weights for every iteration after trigger # to get average of the weights for every subsequent iteration -julia> fieldnames(AWD_LSTM) +julia> fieldnames(ULMFiT.AWD_LSTM) (:layer, :T, :accum) -julia> awd = AWD_LSTM(3, 4, 0.5) +julia> awd = ULMFiT.AWD_LSTM(3, 4, 0.5) # Setting trigger iteration -julia> set_trigger!(1000, awd) +julia> ULMFiT.set_trigger!(1000, awd) julia> awd.T 1000 # Pass -julia> x = rand(3) +julia> x = rand(3); julia> h = awd(x) -Tracked 4-element Array{Float64,1}: - -0.0751824486756288 - -0.3061227967356536 - -0.030079860137667995 - -0.09833401074779546 +4×1 Matrix{Float64}: + 0.15229648590284084 + -0.05929450272853615 + -0.06110043118692251 + 0.15302430271141032 # Resetting drop masks - julia> awd.layer.cell.maskWi - 16×3 Array{Float32,2}: - 0.0 2.0 2.0 - 2.0 2.0 2.0 +julia> awd.layer.cell.maskWi +16×3 Matrix{Float32}: + 0.0 0.0 0.0 + 2.0 0.0 0.0 0.0 2.0 0.0 - 0.0 0.0 2.0 - 0.0 0.0 2.0 - 2.0 2.0 2.0 + 0.0 0.0 0.0 2.0 2.0 2.0 - 0.0 2.0 2.0 0.0 2.0 0.0 2.0 0.0 2.0 + 2.0 2.0 2.0 + 2.0 0.0 0.0 0.0 0.0 2.0 - 0.0 2.0 2.0 + 2.0 0.0 0.0 2.0 0.0 2.0 0.0 2.0 0.0 0.0 2.0 0.0 - 2.0 0.0 2.0 + 2.0 2.0 2.0 + 2.0 2.0 2.0 - julia> reset_masks!(awd) - julia> awd.layer.cell.maskWi - 16×3 Array{Float32,2}: +julia> ULMFiT.reset_masks!(awd) +julia> awd.layer.cell.maskWi +16×3 Matrix{Float32}: 0.0 2.0 0.0 - 0.0 0.0 0.0 - 2.0 0.0 0.0 0.0 2.0 0.0 + 0.0 0.0 0.0 2.0 2.0 0.0 2.0 2.0 2.0 - 2.0 2.0 0.0 - 2.0 2.0 0.0 2.0 2.0 2.0 + 0.0 2.0 0.0 + 2.0 2.0 0.0 + 2.0 0.0 2.0 0.0 0.0 2.0 2.0 0.0 0.0 2.0 2.0 2.0 - 2.0 2.0 2.0 0.0 0.0 2.0 - 0.0 2.0 0.0 + 0.0 2.0 2.0 + 2.0 0.0 2.0 0.0 0.0 2.0 ``` @@ -291,33 +292,34 @@ Tracked 4-element Array{Float64,1}: This layer applis Variational-DropOut, which is, using same dropout mask till it is not specified to change or till a pass is over. This dropout is useful for recurrent layers since these layers perform better if same mask is used for all time-steps (pass) instead of using different for every timestep. [Refer [this](https://arxiv.org/pdf/1506.02557.pdf) paper for more details]. This layer saves the masks after generation till it is not specified to change. To change the mask use `reset_masks!` function. ```julia -julia> vd = VarDrop(0.5) -VarDrop{Float64}(0.5, Array{Float32}(0,0), true, true) +julia> vd = ULMFiT.VarDrop(0.5) +VarDrop{Float64}(0.5, Matrix{Float32}(undef, 0, 0), true, true) # No mask generation will nothing is passed julia> vd.mask -0×0 Array{Float32,2} +0×0 Matrix{Float32} julia> x = rand(4,5) -4×5 Array{Float64,2}: - 0.480531 0.556341 0.228134 0.439411 0.137296 - 0.541459 0.118603 0.448941 0.568478 0.0440091 - 0.491735 0.55232 0.857768 0.729287 0.842753 - 0.33523 0.0378036 0.491757 0.00710462 0.374096 - - julia> x = vd(x) - 4×5 Array{Float64,2}: - 0.961062 1.11268 0.0 0.0 0.274592 - 1.08292 0.0 0.897881 0.0 0.0880182 - 0.98347 0.0 0.0 1.45857 1.68551 - 0.67046 0.0756071 0.983514 0.0142092 0.0 - - julia> vd.mask - 4×5 Array{Float64,2}: - 2.0 2.0 0.0 0.0 2.0 - 2.0 0.0 2.0 0.0 2.0 - 2.0 0.0 0.0 2.0 2.0 - 2.0 2.0 2.0 2.0 0.0 +4×5 Matrix{Float64}: + 0.383492 0.914917 0.616324 0.940116 0.526015 + 0.286494 0.35078 0.320465 0.334261 0.295965 + 0.232206 0.26289 0.940569 0.23259 0.675406 + 0.152903 0.934304 0.125803 0.727792 0.239359 + +julia> x = vd(x) +4×5 Matrix{Float64}: + 0.0 0.0 0.0 1.88023 1.05203 + 0.0 0.0 0.64093 0.668522 0.591929 + 0.464413 0.0 1.88114 0.0 0.0 + 0.0 0.0 0.0 0.0 0.478717 + +julia> vd.mask +4×5 Matrix{Float64}: + 0.0 0.0 0.0 2.0 2.0 + 0.0 0.0 2.0 2.0 2.0 + 2.0 0.0 2.0 0.0 0.0 + 0.0 0.0 0.0 0.0 2.0 + ``` ### Dropped Embeddings (DroppedEmbeddings) @@ -325,35 +327,35 @@ julia> x = rand(4,5) This layer is an embedding layer which can work in two ways either to give embeddings Vectors for the given indices of words in vocabulary or can be used to get probability distribution for all the words of vocabulary with softmax layer, which is also called as weight-tying. Here, it can be used to tie weights of the embedding layer and the last softmax layer. In addition to this, it also dropped embeddings for words randomly for given probability of dropping, in other words, it puts whole embedding vector of randomly selects to vector of zeros. Here, the mask used for the dropping posses variational property, that is, it cannot be changed till it is not specified to change or generate a new drop mask. `reset_masks!` should be used to reset the mask. ```julia -julia> fieldnames(DroppedEmbeddings) +julia> fieldnames(ULMFiT.DroppedEmbeddings) (:emb, :p, :mask, :active) -julia> de = DroppedEmbeddings(5, 2, 0.3) +julia> de = ULMFiT.DroppedEmbeddings(5, 2, 0.3); # Pass -julia> x = [4,2,1] +julia> x = [4,2,1]; julia> embeddings = de(x) -Tracked 2×3 LinearAlgebra.Transpose{Float32,Array{Float32,2}}: - 0.86327 0.537614 -0.0 - 0.152131 -0.541008 -0.0 +2×3 transpose(::Matrix{Float32}) with eltype Float32: + 0.363157 -0.0246867 -0.332342 + -0.553211 -0.594884 0.184288 - julia> de.mask - 5-element Array{Float32,1}: - 0.0 +julia> de.mask +5-element Vector{Float32}: + 1.4285715 1.4285715 1.4285715 1.4285715 1.4285715 - # reset mask - julia> reset_masks!(de) - julia> de.mask - 5-element Array{Float32,1}: - 0.0 +# reset mask +julia> reset_masks!(de) +julia> de.mask +5-element Vector{Float32}: 1.4285715 1.4285715 - 0.0 1.4285715 + 0.0 + 0.0 ``` ### Concat-Pooled Dense layer @@ -362,13 +364,13 @@ This is a simple modification to the original `Dense` layer for recurrent networ ```julia # The first argument is the length of the output Vector of the preceding RNN layer to this layer. Also, by default if uses identity activation, it can be changed by giving desired activaiton as the third argument -julia> pd = PooledDense(4, 3) +julia> pd = ULMFiT.PooledDense(4, 3) # Pass -julia> X = [rand(4), rand(4), rand(4)] +julia> X = [rand(4), rand(4), rand(4)]; julia> pd(X) -Tracked 3×1 Array{Float64,2}: - -2.2106991143006036 - -0.9560163708455404 - -0.4770649645417375 +3×1 Matrix{Float64}: + -1.3679283360573462 + 1.1115990254044759 + -0.27398355913859046 ``` diff --git a/docs/src/crf.md b/docs/src/crf.md index 19f958d..af93cbf 100644 --- a/docs/src/crf.md +++ b/docs/src/crf.md @@ -6,18 +6,19 @@ Let us first load the dependencies- using Flux using Flux: onehot, train!, Params, gradient, LSTM, Dense, reset! - using TextAnalysis: CRF, viterbi_decode, crf_loss + using TextModels: CRF, viterbi_decode, crf_loss Conditional Random Field layer is essentially like a softmax that operates on the top most layer. Let us suppose the following input sequence to the CRF with `NUM_LABELS = 2` ```julia +julia> NUM_LABELS = 2 julia> SEQUENCE_LENGTH = 2 # CRFs can handle variable length inputs sequences -julia> input_seq = [rand(NUM_LABELS + 2) for i in 1:SEQUENCE_LENGTH] # NUM_LABELS + 2, where two extra features correspond to the :START and :END label. -2-element Array{Array{Float64,1},1}: - [0.523462, 0.455434, 0.274347, 0.755279] - [0.610991, 0.315381, 0.0863632, 0.693031] +julia> input_seq = [Float32.(rand(NUM_LABELS + 2)) for i in 1:SEQUENCE_LENGTH] # NUM_LABELS + 2, where two extra features correspond to the :START and :END label. +2-element Vector{Vector{Float32}}: + [0.5114323, 0.5355139, 0.4011792, 0.56359255] + [0.22925346, 0.21232551, 0.77616125, 0.41560093] ``` @@ -56,16 +57,16 @@ julia> label_seq3 = [onehot(2, 1:2), onehot(1, 1:2)] julia> label_seq4 = [onehot(2, 1:2), onehot(2, 1:2)] julia> crf_loss(c, input_seq, label_seq1, init_α) -1.9206894963901504 (tracked) +1.33554f0 julia> crf_loss(c, input_seq, label_seq2, init_α) -1.4972745472075206 (tracked) +1.2327178f0 julia> crf_loss(c, input_seq, label_seq3, init_α) -1.543210471592448 (tracked) +1.3454239f0 julia> crf_loss(c, input_seq, label_seq4, init_α) -0.876923329893466 (tracked) +1.6871009f0 ``` @@ -75,9 +76,9 @@ We can decode this using Viterbi Decode. ```julia julia> viterbi_decode(c, input_seq, init_α) # Gives the label_sequence with least loss -2-element Array{Flux.OneHotVector,1}: - [false, true] - [false, true] +2-element Vector{Flux.OneHotArray{UInt32, 2, 0, 1, UInt32}}: + [1, 0] + [0, 1] ``` @@ -96,7 +97,7 @@ CRFs smoothly work over Flux layers- julia> NUM_FEATURES = 20 julia> input_seq = [rand(NUM_FEATURES) for i in 1:SEQUENCE_LENGTH] -2-element Array{Array{Float64,1},1}: +2-element Vector{Vector{Float32}}: [0.948219, 0.719964, 0.352734, 0.0677656, 0.570564, 0.187673, 0.525125, 0.787807, 0.262452, 0.472472, 0.573259, 0.643369, 0.00592054, 0.945258, 0.951466, 0.323156, 0.679573, 0.663285, 0.218595, 0.152846] [0.433295, 0.11998, 0.99615, 0.530107, 0.188887, 0.897213, 0.993726, 0.0799431, 0.953333, 0.941808, 0.982638, 0.0919345, 0.27504, 0.894169, 0.66818, 0.449537, 0.93063, 0.384957, 0.415114, 0.212203] @@ -105,7 +106,7 @@ julia> m1 = Dense(NUM_FEATURES, NUM_LABELS + 2) julia> loss1(input_seq, label_seq) = crf_loss(c, m1.(input_seq), label_seq, init_α) # loss for model m1 julia> loss1(input_seq, [onehot(1, 1:2), onehot(1, 1:2)]) -4.6620379898687485 (tracked) +4.6620379898687485 ``` @@ -124,7 +125,7 @@ julia> m2(x) = dense_out.(lstm.(x)) julia> loss2(input_seq, label_seq) = crf_loss(c, m2(input_seq), label_seq, init_α) # loss for model m2 julia> loss2(input_seq, [onehot(1, 1:2), onehot(1, 1:2)]) -1.6501050910529504 (tracked) +1.6501050910529504 julia> reset!(lstm) ``` From 24fa0d8cc2a032ea8e3feca8c346b6c2eb53b993 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 24 May 2021 14:51:26 +0000 Subject: [PATCH 17/23] CompatHelper: bump compat for "BSON" to "0.3" --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 4687488..df48a3a 100644 --- a/Project.toml +++ b/Project.toml @@ -24,7 +24,7 @@ Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" WordTokenizers = "796a5d58-b03d-544a-977e-18100b691f6e" [compat] -BSON = "0.2.5" +BSON = "0.2.5, 0.3" DataDeps = "0.7" DataStructures = "0.17, 0.18" Flux = "0.9" From 078e3888e0235b3ce16839f24aaa921399094698 Mon Sep 17 00:00:00 2001 From: Adarshkumar712 Date: Thu, 1 Jul 2021 00:55:43 -0700 Subject: [PATCH 18/23] Minor Corrections --- src/ULMFiT/custom_layers.jl | 12 +++++++++--- src/ULMFiT/fine_tune_lm.jl | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/ULMFiT/custom_layers.jl b/src/ULMFiT/custom_layers.jl index c0275a6..d83c43c 100644 --- a/src/ULMFiT/custom_layers.jl +++ b/src/ULMFiT/custom_layers.jl @@ -63,7 +63,7 @@ function WeightDroppedLSTMCell(in::Integer, out::Integer, p::Float64=0.0; init(out*4, in), init(out*4, out), init(out*4), - reshape(zeros(Float32, out),out, 1), + reshape(zeros(Float32, out), out, 1), reshape(zeros(Float32, out), out, 1), p, drop_mask((out*4, in), p), @@ -112,9 +112,15 @@ function WeightDroppedLSTM(a...; kw...) return Flux.Recur(cell, hidden) end -# over definition for reset! to work with pretrained model +""" + reset!(m) + +Resets the h, c parameters of the LSTM Cell. + +For more refer [`Flux.reset`](@ref https://fluxml.ai/Flux.jl/stable/models/layers/#Flux.reset!) +""" function reset!(m) - try + try # to accomodate the definition in previously trained Language Model (m.state = (m.cell.h, m.cell.c)) catch Flux.reset!(m) diff --git a/src/ULMFiT/fine_tune_lm.jl b/src/ULMFiT/fine_tune_lm.jl index b2e7261..22a08d3 100644 --- a/src/ULMFiT/fine_tune_lm.jl +++ b/src/ULMFiT/fine_tune_lm.jl @@ -47,7 +47,7 @@ end epochs::Integer=1, checkpoint_itvl::Integer=5000) This function contains main training loops for fine-tuning the language model. -To use this funciton, an instance of LanguageModel and a data loader is needed. +To use this function, an instance of LanguageModel and a data loader is needed. Read the docs for more info about arguments """ function fine_tune_lm!(lm=LanguageModel(), data_loader=imdb_fine_tune_data, From 7b2b460899d2fa22213c2ae8296833a7e8b7ca27 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 11 Jul 2021 15:39:27 +0000 Subject: [PATCH 19/23] CompatHelper: add new compat entry for "CUDA" at version "3" --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index e8a08dd..bbfe4e7 100644 --- a/Project.toml +++ b/Project.toml @@ -25,6 +25,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] BSON = "0.3.3" +CUDA = "3" DataStructures = "0.18.9" Flux = "0.12.2" JSON = "0.21.1" From 53ee374f1df4818e0f3a8ecc813275bba90ac025 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 11 Jul 2021 15:39:31 +0000 Subject: [PATCH 20/23] CompatHelper: add new compat entry for "DataDeps" at version "0.7" --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index e8a08dd..0580cde 100644 --- a/Project.toml +++ b/Project.toml @@ -25,6 +25,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] BSON = "0.3.3" +DataDeps = "0.7" DataStructures = "0.18.9" Flux = "0.12.2" JSON = "0.21.1" From 3b8b81c9359a9a8990332d35cf275595566ecf9a Mon Sep 17 00:00:00 2001 From: Konstantinos Samaras-Tsakiris Date: Sun, 18 Jul 2021 13:22:54 +0200 Subject: [PATCH 21/23] Fix broken docs link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index afa0f57..4b63f6e 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ A Julia package for working with text. ## Introduction -The TextModels package enhances the TextAnalysis package with end-user focussed, practical natural language models, typically based on neural networks (in this case, [Flux](https://fluxml.ai/)). Please see the [documentation](https://juliatext.github.io/TextAnalysis.jl/latest) for more. +The TextModels package enhances the TextAnalysis package with end-user focussed, practical natural language models, typically based on neural networks (in this case, [Flux](https://fluxml.ai/)). Please see the [documentation](https://juliahub.com/docs/TextModels) for more. - **License** : [MIT License](https://github.com/JuliaText/TextAnalysis.jl/blob/master/LICENSE.md) From fd2fd328241cf57b0a8a4c77caf30c285b30614d Mon Sep 17 00:00:00 2001 From: Avik Sengupta Date: Mon, 6 Dec 2021 00:06:21 +0000 Subject: [PATCH 22/23] CorpusLoader compat --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 3e434c8..f83804a 100644 --- a/Project.toml +++ b/Project.toml @@ -37,6 +37,7 @@ TextAnalysis = "0.7.3" WordTokenizers = "0.5.6" Zygote = "0.6.10" julia = "1.6" +CorpusLoaders = "0.3" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 66ec1fe47c23966fe0b3e1d0bfa207963963cdb8 Mon Sep 17 00:00:00 2001 From: Avik Sengupta Date: Fri, 10 Dec 2021 18:29:44 -0500 Subject: [PATCH 23/23] Flux update --- Project.toml | 6 +++--- src/sequence/sequence_models.jl | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index f83804a..bbe2f84 100644 --- a/Project.toml +++ b/Project.toml @@ -25,10 +25,11 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] BSON = "0.3.3" -DataDeps = "0.7" CUDA = "3" +CorpusLoaders = "0.3" +DataDeps = "0.7" DataStructures = "0.18.9" -Flux = "0.12.2" +Flux = "0.12.8" JSON = "0.21.1" Languages = "0.4.3" NNlib = "0.7" @@ -37,7 +38,6 @@ TextAnalysis = "0.7.3" WordTokenizers = "0.5.6" Zygote = "0.6.10" julia = "1.6" -CorpusLoaders = "0.3" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/sequence/sequence_models.jl b/src/sequence/sequence_models.jl index 8c8a6df..8b4a3a6 100644 --- a/src/sequence/sequence_models.jl +++ b/src/sequence/sequence_models.jl @@ -79,6 +79,8 @@ function BiLSTM_CNN_CRF_Model(labels, chars_idx, words_idx, UNK_char_idx,UNK_Wor (1, 1), # stride (0, 2), # pad (1, 1), # dilation + 1 # groups + ) BiLSTM_CNN_CRF_Model(labels, chars_idx, words_idx, conv1, W_Char_Embed, W_word_Embed,