Skip to content
Snippets Groups Projects
Commit 83bec349 authored by Florian Atteneder's avatar Florian Atteneder
Browse files

accelerate dg rhs evaluation using LoopVectorization.jl

parent 82a0e637
Branches fa/opt-dg-rhs
No related tags found
No related merge requests found
# This file is machine-generated - editing it directly is not advised # This file is machine-generated - editing it directly is not advised
julia_version = "1.9.1" julia_version = "1.9.3"
manifest_format = "2.0" manifest_format = "2.0"
project_hash = "23b088bdd6df8e8419a0902b8cafd14f76c1ffbf" project_hash = "211163242c6c90535eae175929c4423583770490"
[[deps.Adapt]] [[deps.Adapt]]
deps = ["LinearAlgebra"] deps = ["LinearAlgebra"]
...@@ -14,6 +14,34 @@ version = "3.4.0" ...@@ -14,6 +14,34 @@ version = "3.4.0"
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
version = "1.1.1" version = "1.1.1"
[[deps.ArrayInterface]]
deps = ["Adapt", "LinearAlgebra", "Requires", "SparseArrays", "SuiteSparse"]
git-tree-sha1 = "f83ec24f76d4c8f525099b2ac475fc098138ec31"
uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
version = "7.4.11"
[deps.ArrayInterface.extensions]
ArrayInterfaceBandedMatricesExt = "BandedMatrices"
ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices"
ArrayInterfaceCUDAExt = "CUDA"
ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore"
ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore"
ArrayInterfaceTrackerExt = "Tracker"
[deps.ArrayInterface.weakdeps]
BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
[[deps.ArrayInterfaceCore]]
deps = ["LinearAlgebra", "SnoopPrecompile", "SparseArrays", "SuiteSparse"]
git-tree-sha1 = "e5f08b5689b1aad068e01751889f2f615c7db36d"
uuid = "30b0a656-2188-435a-8636-2ec0e6a096e2"
version = "0.1.29"
[[deps.Artifacts]] [[deps.Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
...@@ -37,12 +65,24 @@ git-tree-sha1 = "43b1a4a8f797c1cddadf60499a8a077d4af2cd2d" ...@@ -37,12 +65,24 @@ git-tree-sha1 = "43b1a4a8f797c1cddadf60499a8a077d4af2cd2d"
uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35"
version = "0.1.7" version = "0.1.7"
[[deps.BitTwiddlingConvenienceFunctions]]
deps = ["Static"]
git-tree-sha1 = "0c5f81f47bbbcf4aea7b2959135713459170798b"
uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b"
version = "0.1.5"
[[deps.Bzip2_jll]] [[deps.Bzip2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2" git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2"
uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
version = "1.0.8+0" version = "1.0.8+0"
[[deps.CPUSummary]]
deps = ["CpuId", "IfElse", "PrecompileTools", "Static"]
git-tree-sha1 = "89e0654ed8c7aebad6d5ad235d6242c2d737a928"
uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
version = "0.2.3"
[[deps.Cairo_jll]] [[deps.Cairo_jll]]
deps = ["Artifacts", "Bzip2_jll", "CompilerSupportLibraries_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] deps = ["Artifacts", "Bzip2_jll", "CompilerSupportLibraries_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
git-tree-sha1 = "4b859a208b2397a7a623a03449e4636bdb17bcf2" git-tree-sha1 = "4b859a208b2397a7a623a03449e4636bdb17bcf2"
...@@ -61,6 +101,12 @@ git-tree-sha1 = "38f7a08f19d8810338d4f5085211c7dfa5d5bdd8" ...@@ -61,6 +101,12 @@ git-tree-sha1 = "38f7a08f19d8810338d4f5085211c7dfa5d5bdd8"
uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
version = "0.1.4" version = "0.1.4"
[[deps.CloseOpenIntervals]]
deps = ["Static", "StaticArrayInterface"]
git-tree-sha1 = "70232f82ffaab9dc52585e0dd043b5e0c6b714f1"
uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9"
version = "0.1.12"
[[deps.CodecZlib]] [[deps.CodecZlib]]
deps = ["TranscodingStreams", "Zlib_jll"] deps = ["TranscodingStreams", "Zlib_jll"]
git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da" git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da"
...@@ -105,7 +151,7 @@ version = "4.5.0" ...@@ -105,7 +151,7 @@ version = "4.5.0"
[[deps.CompilerSupportLibraries_jll]] [[deps.CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"] deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
version = "1.0.2+0" version = "1.0.5+0"
[[deps.ConstructionBase]] [[deps.ConstructionBase]]
deps = ["LinearAlgebra"] deps = ["LinearAlgebra"]
...@@ -118,6 +164,12 @@ git-tree-sha1 = "d05d9e7b7aedff4e5b51a029dced05cfb6125781" ...@@ -118,6 +164,12 @@ git-tree-sha1 = "d05d9e7b7aedff4e5b51a029dced05cfb6125781"
uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" uuid = "d38c429a-6771-53c6-b99e-75d170b6e991"
version = "0.6.2" version = "0.6.2"
[[deps.CpuId]]
deps = ["Markdown"]
git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406"
uuid = "adafc99b-e345-5852-983c-f28acb93d879"
version = "0.3.1"
[[deps.Crayons]] [[deps.Crayons]]
git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15"
uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
...@@ -312,6 +364,17 @@ git-tree-sha1 = "129acf094d168394e80ee1dc4bc06ec835e510a3" ...@@ -312,6 +364,17 @@ git-tree-sha1 = "129acf094d168394e80ee1dc4bc06ec835e510a3"
uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566" uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566"
version = "2.8.1+1" version = "2.8.1+1"
[[deps.HostCPUFeatures]]
deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"]
git-tree-sha1 = "eb8fed28f4994600e29beef49744639d985a04b2"
uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
version = "0.1.16"
[[deps.IfElse]]
git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1"
uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
version = "0.1.1"
[[deps.IniFile]] [[deps.IniFile]]
git-tree-sha1 = "f550e6e32074c939295eb5ea6de31849ac2c9625" git-tree-sha1 = "f550e6e32074c939295eb5ea6de31849ac2c9625"
uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f" uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f"
...@@ -414,6 +477,12 @@ git-tree-sha1 = "ab9aa169d2160129beb241cb2750ca499b4e90e9" ...@@ -414,6 +477,12 @@ git-tree-sha1 = "ab9aa169d2160129beb241cb2750ca499b4e90e9"
uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316"
version = "0.15.17" version = "0.15.17"
[[deps.LayoutPointers]]
deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface"]
git-tree-sha1 = "88b8f66b604da079a627b6fb2860d3704a6729a1"
uuid = "10f19ff3-798f-405d-979b-55457f8fc047"
version = "0.1.14"
[[deps.LazyArtifacts]] [[deps.LazyArtifacts]]
deps = ["Artifacts", "Pkg"] deps = ["Artifacts", "Pkg"]
uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
...@@ -513,12 +582,32 @@ git-tree-sha1 = "cedb76b37bc5a6c702ade66be44f831fa23c681e" ...@@ -513,12 +582,32 @@ git-tree-sha1 = "cedb76b37bc5a6c702ade66be44f831fa23c681e"
uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36"
version = "1.0.0" version = "1.0.0"
[[deps.LoopVectorization]]
deps = ["ArrayInterface", "ArrayInterfaceCore", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "PrecompileTools", "SIMDTypes", "SLEEFPirates", "Static", "StaticArrayInterface", "ThreadingUtilities", "UnPack", "VectorizationBase"]
git-tree-sha1 = "c88a4afe1703d731b1c4fdf4e3c7e77e3b176ea2"
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
version = "0.12.165"
[deps.LoopVectorization.extensions]
ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"]
SpecialFunctionsExt = "SpecialFunctions"
[deps.LoopVectorization.weakdeps]
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
[[deps.MacroTools]] [[deps.MacroTools]]
deps = ["Markdown", "Random"] deps = ["Markdown", "Random"]
git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2" git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.10" version = "0.5.10"
[[deps.ManualMemory]]
git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd"
uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667"
version = "0.1.8"
[[deps.Markdown]] [[deps.Markdown]]
deps = ["Base64"] deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
...@@ -656,7 +745,7 @@ version = "0.40.1+0" ...@@ -656,7 +745,7 @@ version = "0.40.1+0"
[[deps.Pkg]] [[deps.Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
version = "1.9.0" version = "1.9.2"
[[deps.PlotThemes]] [[deps.PlotThemes]]
deps = ["PlotUtils", "Statistics"] deps = ["PlotUtils", "Statistics"]
...@@ -676,12 +765,24 @@ git-tree-sha1 = "02ecc6a3427e7edfff1cebcf66c1f93dd77760ec" ...@@ -676,12 +765,24 @@ git-tree-sha1 = "02ecc6a3427e7edfff1cebcf66c1f93dd77760ec"
uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
version = "1.38.1" version = "1.38.1"
[[deps.PolyesterWeave]]
deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"]
git-tree-sha1 = "240d7170f5ffdb285f9427b92333c3463bf65bf6"
uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad"
version = "0.2.1"
[[deps.Polynomials]] [[deps.Polynomials]]
deps = ["Intervals", "LinearAlgebra", "MutableArithmetics", "RecipesBase"] deps = ["Intervals", "LinearAlgebra", "MutableArithmetics", "RecipesBase"]
git-tree-sha1 = "a1f7f4e41404bed760213ca01d7f384319f717a5" git-tree-sha1 = "a1f7f4e41404bed760213ca01d7f384319f717a5"
uuid = "f27b6e38-b328-58d1-80ce-0feddd5e7a45" uuid = "f27b6e38-b328-58d1-80ce-0feddd5e7a45"
version = "2.0.25" version = "2.0.25"
[[deps.PrecompileTools]]
deps = ["Preferences"]
git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f"
uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
version = "1.2.0"
[[deps.Preferences]] [[deps.Preferences]]
deps = ["TOML"] deps = ["TOML"]
git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d" git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d"
...@@ -767,6 +868,17 @@ version = "2.0.8" ...@@ -767,6 +868,17 @@ version = "2.0.8"
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
version = "0.7.0" version = "0.7.0"
[[deps.SIMDTypes]]
git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c"
uuid = "94e857df-77ce-4151-89e5-788b33177be4"
version = "0.1.0"
[[deps.SLEEFPirates]]
deps = ["IfElse", "Static", "VectorizationBase"]
git-tree-sha1 = "4b8586aece42bee682399c4c4aee95446aa5cd19"
uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa"
version = "0.6.39"
[[deps.Scratch]] [[deps.Scratch]]
deps = ["Dates"] deps = ["Dates"]
git-tree-sha1 = "f94f779c94e58bf9ea243e77a37e16d9de9126bd" git-tree-sha1 = "f94f779c94e58bf9ea243e77a37e16d9de9126bd"
...@@ -821,6 +933,23 @@ git-tree-sha1 = "5d65101b2ed17a8862c4c05639c3ddc7f3d791e1" ...@@ -821,6 +933,23 @@ git-tree-sha1 = "5d65101b2ed17a8862c4c05639c3ddc7f3d791e1"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b" uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "1.8.7" version = "1.8.7"
[[deps.Static]]
deps = ["IfElse"]
git-tree-sha1 = "f295e0a1da4ca425659c57441bcb59abb035a4bc"
uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
version = "0.8.8"
[[deps.StaticArrayInterface]]
deps = ["ArrayInterface", "Compat", "IfElse", "LinearAlgebra", "Requires", "SnoopPrecompile", "SparseArrays", "Static", "SuiteSparse"]
git-tree-sha1 = "33040351d2403b84afce74dae2e22d3f5b18edcb"
uuid = "0d7ed370-da01-4f52-bd93-41d350b8b718"
version = "1.4.0"
weakdeps = ["OffsetArrays", "StaticArrays"]
[deps.StaticArrayInterface.extensions]
StaticArrayInterfaceOffsetArraysExt = "OffsetArrays"
StaticArrayInterfaceStaticArraysExt = "StaticArrays"
[[deps.StaticArrays]] [[deps.StaticArrays]]
deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"] deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"]
git-tree-sha1 = "6954a456979f23d05085727adb17c4551c19ecd1" git-tree-sha1 = "6954a456979f23d05085727adb17c4551c19ecd1"
...@@ -860,6 +989,10 @@ git-tree-sha1 = "b03a3b745aa49b566f128977a7dd1be8711c5e71" ...@@ -860,6 +989,10 @@ git-tree-sha1 = "b03a3b745aa49b566f128977a7dd1be8711c5e71"
uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
version = "0.6.14" version = "0.6.14"
[[deps.SuiteSparse]]
deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
[[deps.SuiteSparse_jll]] [[deps.SuiteSparse_jll]]
deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"] deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"]
uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
...@@ -897,6 +1030,12 @@ version = "0.1.1" ...@@ -897,6 +1030,12 @@ version = "0.1.1"
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[[deps.ThreadingUtilities]]
deps = ["ManualMemory"]
git-tree-sha1 = "eda08f7e9818eb53661b3deb74e3159460dfbc27"
uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5"
version = "0.5.2"
[[deps.TimeZones]] [[deps.TimeZones]]
deps = ["Dates", "Downloads", "InlineStrings", "LazyArtifacts", "Mocking", "Printf", "RecipesBase", "Scratch", "Unicode"] deps = ["Dates", "Downloads", "InlineStrings", "LazyArtifacts", "Mocking", "Printf", "RecipesBase", "Scratch", "Unicode"]
git-tree-sha1 = "a92ec4466fc6e3dd704e2668b5e7f24add36d242" git-tree-sha1 = "a92ec4466fc6e3dd704e2668b5e7f24add36d242"
...@@ -953,6 +1092,12 @@ git-tree-sha1 = "c2d0db3ef09f1942d08ea455a9e252594be5f3b6" ...@@ -953,6 +1092,12 @@ git-tree-sha1 = "c2d0db3ef09f1942d08ea455a9e252594be5f3b6"
uuid = "4004b06d-e244-455f-a6ce-a5f9919cc534" uuid = "4004b06d-e244-455f-a6ce-a5f9919cc534"
version = "1.0.1" version = "1.0.1"
[[deps.VectorizationBase]]
deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static", "StaticArrayInterface"]
git-tree-sha1 = "b182207d4af54ac64cbc71797765068fdeff475d"
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
version = "0.21.64"
[[deps.Wayland_jll]] [[deps.Wayland_jll]]
deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"] deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"]
git-tree-sha1 = "ed8d92d9774b077c53e1da50fd81a36af3744c1c" git-tree-sha1 = "ed8d92d9774b077c53e1da50fd81a36af3744c1c"
......
...@@ -14,6 +14,7 @@ Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" ...@@ -14,6 +14,7 @@ Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
Jacobi = "83f21c0b-4282-5fbc-9e3f-f6da3d2e584c" Jacobi = "83f21c0b-4282-5fbc-9e3f-f6da3d2e584c"
LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
......
...@@ -8,6 +8,7 @@ using InteractiveUtils ...@@ -8,6 +8,7 @@ using InteractiveUtils
using Jacobi using Jacobi
@reexport using HDF5 @reexport using HDF5
using LinearAlgebra using LinearAlgebra
using LoopVectorization
import MacroTools import MacroTools
using OrderedCollections using OrderedCollections
using Plots using Plots
......
...@@ -8,12 +8,14 @@ function compute_rhs_weak_form!(rhs, f, nf, mesh::Mesh1d) ...@@ -8,12 +8,14 @@ function compute_rhs_weak_form!(rhs, f, nf, mesh::Mesh1d)
@unpack K = mesh @unpack K = mesh
shape = layout(mesh) shape = layout(mesh)
mat_rhs = vreshape(rhs, shape) mat_rhs = vreshape(rhs, shape)
mat_f = vreshape(f, shape) mat_f = vreshape(f, shape)
mul!(mat_rhs, MDM, mat_f) mul!(mat_rhs, MDM, mat_f)
nf_lhs = view(nf, 1:K) nf_lhs = view(nf, 1:K)
nf_rhs = view(nf, K+1:2*K) nf_rhs = view(nf, K+1:2*K)
@. mat_rhs -= (nf_rhs' * Ml_rhs + #= minus contained in normal vector =# nf_lhs' * Ml_lhs) @turbo @. begin # factor x10
mat_rhs .*= invjac mat_rhs -= (nf_rhs' * Ml_rhs + #= minus contained in normal vector =# nf_lhs' * Ml_lhs)
mat_rhs *= invjac
end
return return
end end
function compute_rhs_weak_form!(rhs, f, s, nf, mesh::Mesh1d) function compute_rhs_weak_form!(rhs, f, s, nf, mesh::Mesh1d)
...@@ -28,42 +30,48 @@ function compute_rhs_weak_form!(rhs, fx, fy, nf, mesh::Mesh2d) ...@@ -28,42 +30,48 @@ function compute_rhs_weak_form!(rhs, fx, fy, nf, mesh::Mesh2d)
@unpack invM, MDM, Ml_lhs, Ml_rhs, Npts = element @unpack invM, MDM, Ml_lhs, Ml_rhs, Npts = element
Ml_down, Ml_up = Ml_lhs, Ml_rhs Ml_down, Ml_up = Ml_lhs, Ml_rhs
Nx, Ny = Npts, Npts Nx, Ny = Npts, Npts
@inbounds for k = 1:n_cells(mesh.tree) Ncs = n_cells(mesh.tree)
@inbounds for kk in 1:Ncs
# bulk # bulk
idxs_bulk = cellindices(mesh, k) idxs_bulk = flatcellindices(mesh, kk)
v_rhs = view(rhs, idxs_bulk) v_rhs = rview(rhs, idxs_bulk, (Nx,Ny))
v_fx = view(fx, idxs_bulk) v_fx = rview(fx, idxs_bulk, (Nx,Ny))
v_fy = view(fy, idxs_bulk) v_fy = rview(fy, idxs_bulk, (Nx,Ny))
v_dxdX = view(dxdX, idxs_bulk) v_dxdX = rview(dxdX, idxs_bulk, (Nx,Ny))
v_dydY = view(dydY, idxs_bulk) v_dydY = rview(dydY, idxs_bulk, (Nx,Ny))
# faces # faces
idxs_lhs, idxs_rhs, idxs_down, idxs_up = faceindices(mesh, k) idxs_lhs, idxs_rhs, idxs_down, idxs_up = faceindices(mesh, kk)
v_nf_lhs = view(nf, idxs_lhs) v_nf_lhs = view(nf, idxs_lhs)
v_nf_rhs = view(nf, idxs_rhs) v_nf_rhs = view(nf, idxs_rhs)
v_nf_down = view(nf, idxs_down) v_nf_down = view(nf, idxs_down)
v_nf_up = view(nf, idxs_up) v_nf_up = view(nf, idxs_up)
# TODO Benchmark and see whether LoopVectorization can used here. # atm we can utilize that cartesian grids have constant jacobians and
@inbounds for j=1:Ny, i=1:Nx # pull those out of the loop
rhsij = 0 dxdXij = v_dxdX[1,1]
dxdXij = v_dxdX[i,j] dydYij = v_dydY[1,1]
dydYij = v_dydY[i,j]
for k = 1:Nx # notes on @turbo:
rhsij += MDM[i,k] * v_fx[k,j] # - can't use k as index for both inner loops, otherwise gives wrong result
# - can't insert x_rhsij *= dxdXij after the k loop,
# cf. https://github.com/JuliaSIMD/LoopVectorization.jl/issues/506
@turbo for j in 1:Ny
for i in 1:Nx
x_rhsij = 0.0
for k in 1:Nx
x_rhsij += MDM[i,k] * v_fx[k,j]
end
y_rhsij = 0.0
for l in 1:Ny
y_rhsij += MDM[j,l] * v_fy[i,l]
end
bdry_rhsij = (v_nf_rhs[j] * Ml_rhs[i] + v_nf_lhs[j] * Ml_lhs[i]) * dxdXij +
(v_nf_up[i] * Ml_up[j] + v_nf_down[i] * Ml_down[j]) * dydYij
v_rhs[i,j] = x_rhsij * dxdXij + y_rhsij * dydYij - bdry_rhsij
end end
v_rhs[i,j] = rhsij * dxdXij
rhsij = 0
for k = 1:Ny
rhsij += MDM[j,k] * v_fy[i,k]
end
v_rhs[i,j] += rhsij * dydYij
v_rhs[i,j] -= (v_nf_rhs[j] * Ml_rhs[i] + v_nf_lhs[j] * Ml_lhs[i]) * dxdXij +
(v_nf_up[i] * Ml_up[j] + v_nf_down[i] * Ml_down[j]) * dydYij
end end
end end
return return
end end
...@@ -73,29 +81,34 @@ function compute_rhs_weak_form!(rhs, fx, fy::Real, nf, mesh::Mesh2d) ...@@ -73,29 +81,34 @@ function compute_rhs_weak_form!(rhs, fx, fy::Real, nf, mesh::Mesh2d)
@unpack invM, MDM, Ml_lhs, Ml_rhs, Npts = element @unpack invM, MDM, Ml_lhs, Ml_rhs, Npts = element
Ml_down, Ml_up = Ml_lhs, Ml_rhs Ml_down, Ml_up = Ml_lhs, Ml_rhs
Nx, Ny = Npts, Npts Nx, Ny = Npts, Npts
@inbounds for k = 1:n_cells(mesh.tree) Ncs = n_cells(mesh.tree)
@inbounds for kk in 1:Ncs
# bulk # bulk
idxs_bulk = cellindices(mesh, k) idxs_bulk = flatcellindices(mesh, kk)
v_rhs = view(rhs, idxs_bulk) v_rhs = rview(rhs, idxs_bulk, (Nx,Ny))
v_fx = view(fx, idxs_bulk) v_fx = rview(fx, idxs_bulk, (Nx,Ny))
v_dxdX = view(dxdX, idxs_bulk) v_dxdX = rview(dxdX, idxs_bulk, (Nx,Ny))
# faces # faces
idxs_lhs, idxs_rhs, _, _ = faceindices(mesh, k) idxs_lhs, idxs_rhs, _, _ = faceindices(mesh, kk)
v_nf_lhs = view(nf, idxs_lhs) v_nf_lhs = view(nf, idxs_lhs)
v_nf_rhs = view(nf, idxs_rhs) v_nf_rhs = view(nf, idxs_rhs)
# TODO Benchmark and see whether LoopVectorization can used here. # atm we can utilize that cartesian grids have constant jacobians and
@inbounds for j=1:Ny, i=1:Nx # pull those out of the loop
rhsij = 0 dxdXij = v_dxdX[1,1]
dxdXij = v_dxdX[i,j]
for k = 1:Nx @turbo for j in 1:Ny
rhsij += MDM[i,k] * v_fx[k,j] for i in 1:Nx
x_rhsij = 0.0
for k in 1:Nx
x_rhsij += MDM[i,k] * v_fx[k,j]
end
bdry_rhsij = (v_nf_rhs[j] * Ml_rhs[i] + v_nf_lhs[j] * Ml_lhs[i]) * dxdXij
v_rhs[i,j] = x_rhsij * dxdXij - bdry_rhsij
end end
v_rhs[i,j] = rhsij * dxdXij
v_rhs[i,j] -= (v_nf_rhs[j] * Ml_rhs[i] + v_nf_lhs[j] * Ml_lhs[i]) * dxdXij
end end
end end
...@@ -107,29 +120,34 @@ function compute_rhs_weak_form!(rhs, fx::Real, fy, nf, mesh::Mesh2d) ...@@ -107,29 +120,34 @@ function compute_rhs_weak_form!(rhs, fx::Real, fy, nf, mesh::Mesh2d)
@unpack invM, MDM, Ml_lhs, Ml_rhs, Npts = element @unpack invM, MDM, Ml_lhs, Ml_rhs, Npts = element
Ml_down, Ml_up = Ml_lhs, Ml_rhs Ml_down, Ml_up = Ml_lhs, Ml_rhs
Nx, Ny = Npts, Npts Nx, Ny = Npts, Npts
@inbounds for k = 1:n_cells(mesh.tree) Ncs = n_cells(mesh.tree)
@inbounds for kk in 1:Ncs
# bulk # bulk
idxs_bulk = cellindices(mesh, k) idxs_bulk = flatcellindices(mesh, kk)
v_rhs = view(rhs, idxs_bulk) v_rhs = rview(rhs, idxs_bulk, (Nx,Ny))
v_fy = view(fy, idxs_bulk) v_fy = rview(fy, idxs_bulk, (Nx,Ny))
v_dydY = view(dydY, idxs_bulk) v_dydY = rview(dydY, idxs_bulk, (Nx,Ny))
# faces # faces
_, _, idxs_down, idxs_up = faceindices(mesh, k) _, _, idxs_down, idxs_up = faceindices(mesh, kk)
v_nf_down = view(nf, idxs_down) v_nf_down = view(nf, idxs_down)
v_nf_up = view(nf, idxs_up) v_nf_up = view(nf, idxs_up)
# TODO Benchmark and see whether LoopVectorization can used here. # atm we can utilize that cartesian grids have constant jacobians and
@inbounds for j=1:Ny, i=1:Nx # pull those out of the loop
rhsij = 0 dydYij = v_dydY[1,1]
dydYij = v_dydY[i,j]
for k = 1:Ny @turbo for j in 1:Ny
rhsij += MDM[j,k] * v_fy[i,k] for i in 1:Nx
y_rhsij = 0.0
for k in 1:Ny
y_rhsij += MDM[j,k] * v_fy[i,k]
end
bdry_rhsij = (v_nf_up[i] * Ml_up[j] + v_nf_down[i] * Ml_down[j]) * dydYij
v_rhs[i,j] = y_rhsij * dydYij - bdry_rhsij
end end
v_rhs[i,j] += rhsij * dydYij
v_rhs[i,j] -= (v_nf_up[i] * Ml_up[j] + v_nf_down[i] * Ml_down[j]) * dydYij
end end
end end
......
...@@ -509,6 +509,17 @@ end ...@@ -509,6 +509,17 @@ end
eachcell(mesh::AbstractMesh) = CellIterator(mesh) eachcell(mesh::AbstractMesh) = CellIterator(mesh)
eachcell(mesh::AbstractMesh, data) = CellDataIterator(mesh, data) eachcell(mesh::AbstractMesh, data) = CellDataIterator(mesh, data)
@inline function flatcellindices(mesh::Mesh1d, idx)
Nx = mesh.element.Npts
offset = mesh.offsets[idx]
return offset+1:offset+Nx
end
@inline function flatcellindices(mesh::Mesh2d, idx)
Nx = mesh.element.Npts
Ny = Nx
offset = mesh.offsets[idx]
return offset+1:offset+Nx*Ny
end
@inline function cellindices(mesh, idx) @inline function cellindices(mesh, idx)
Nx = mesh.element.Npts Nx = mesh.element.Npts
......
...@@ -564,6 +564,7 @@ end ...@@ -564,6 +564,7 @@ end
# reshape allocates, see https://github.com/JuliaLang/julia/issues/36313 # reshape allocates, see https://github.com/JuliaLang/julia/issues/36313
# however, Base allows to construct a custom reshape that does not allocate # however, Base allows to construct a custom reshape that does not allocate
@inline vreshape(x, sz::Tuple{N,Int64}) where N = reshape(view(x, :), sz) @inline vreshape(x, sz::Tuple{N,Int64}) where N = reshape(view(x, :), sz)
@inline rview(x, rng, sz) = reshape(view(x, rng), sz)
TODO() = error("Not implemented yet!") TODO() = error("Not implemented yet!")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment