-
Notifications
You must be signed in to change notification settings - Fork 42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
8290204: FP16 initial backend implementation #204
Closed
+729
−10
Closed
Changes from 1 commit
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
e418333
Initial fp16 backend implementation
smita-kamath 90ff380
Merge vectorIntrinsics+fp16
smita-kamath 6cb10da
Addressed review comments
smita-kamath 7677dbb
Updated leaf level assembly routines as per review comment
smita-kamath 831b266
Updated instructions as per review comment
smita-kamath 1ba2734
Updated cast instructs with new nodes
smita-kamath 60b9e15
typo in D2HF node
smita-kamath a19f869
Correction in HF2D node
smita-kamath 00df24e
32 bit build failure fix
smita-kamath 65a9e19
fix
smita-kamath 399b6d2
fix for build failures
smita-kamath File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1951,6 +1951,16 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType | |
return false; | ||
} | ||
break; | ||
case Op_AddVHF: | ||
case Op_SubVHF: | ||
case Op_MulVHF: | ||
case Op_DivVHF: | ||
case Op_AbsVHF: | ||
case Op_NegVHF: | ||
if (bt != T_SHORT && !VM_Version::supports_avx512_fp16()) { | ||
return false; | ||
} | ||
break; | ||
} | ||
return true; // Per default match rules are supported. | ||
} | ||
|
@@ -2109,16 +2119,6 @@ const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, Bas | |
if ((bt == T_INT || bt == T_LONG) && VM_Version::supports_avx512cd()) { | ||
return true; | ||
} | ||
case Op_AddVHF: | ||
case Op_SubVHF: | ||
case Op_MulVHF: | ||
case Op_DivVHF: | ||
case Op_AbsVHF: | ||
case Op_NegVHF: | ||
if(bt != T_SHORT && !VM_Version::supports_avx512_fp16()) { | ||
return false; | ||
} | ||
return true; | ||
|
||
default: | ||
return false; | ||
|
@@ -4848,50 +4848,16 @@ instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ | |
%} | ||
|
||
// =======================Half Float Reduction========================================== | ||
instruct reduction8HF(rRegI dst, vec src2, vec tmp, vec tmp1, vec tmp2) %{ | ||
predicate(UseAVX > 2); | ||
instruct reduction8HF(rRegI dst, vec src2, vec vtmp, vec vtmp1, vec vtmp2) %{ | ||
match(Set dst (AddReductionVHF dst src2)); | ||
effect(TEMP tmp, TEMP tmp1, TEMP tmp2); | ||
format %{"movdl $tmp1, $dst\n\t" | ||
"evaddsh $tmp1,$tmp1,$src2\n\t" | ||
"pshuflw $tmp,$src2,0x01\n\t" | ||
"evaddsh $tmp1,$tmp1,$tmp\n\t" | ||
"pshuflw $tmp,$src2,0x02\n\t" | ||
"evaddsh $tmp1,$tmp1,$tmp\n\t" | ||
"pshuflw $tmp,$src2,0x03\n\t" | ||
"evaddsh $tmp1,$tmp1,$tmp\n\t" | ||
"pshufd $tmp2,$src2, 0x0E\n\t" | ||
"evaddsh $tmp1,$tmp1,$tmp2\n\t" | ||
"pshuflw $tmp,$tmp2,0x01\n\t" | ||
"evaddsh $tmp1,$tmp1,$tmp\n\t" | ||
"pshuflw $tmp,$tmp2,0x02\n\t" | ||
"evaddsh $tmp1,$tmp1,$tmp\n\t" | ||
"pshuflw $tmp,$tmp2,0x03\n\t" | ||
"evaddsh $tmp1,$tmp1,$tmp\t" | ||
"movdl $dst, $tmp1\t! add reductionHF" %} | ||
ins_encode %{ | ||
__ movdl($tmp1$$XMMRegister, $dst$$Register); | ||
__ evaddsh($tmp1$$XMMRegister, $tmp1$$XMMRegister, $src2$$XMMRegister); | ||
__ pshuflw($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); | ||
__ evaddsh($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp$$XMMRegister); | ||
__ pshuflw($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); | ||
__ evaddsh($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp$$XMMRegister); | ||
__ pshuflw($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); | ||
__ evaddsh($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp$$XMMRegister); | ||
__ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x0E); | ||
__ evaddsh($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister); | ||
__ pshuflw($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); | ||
__ evaddsh($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp$$XMMRegister); | ||
__ pshuflw($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); | ||
__ evaddsh($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp$$XMMRegister); | ||
__ pshuflw($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); | ||
__ evaddsh($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp$$XMMRegister); | ||
__ movdl($dst$$Register, $tmp1$$XMMRegister); | ||
effect(TEMP vtmp, TEMP vtmp1, TEMP vtmp2); | ||
format %{ "reduction_halffloat $dst, $src2\t using $vtmp, $vtmp1, $vtmp2 as TEMP" %} | ||
ins_encode %{ | ||
__ reduce8HF($dst$$Register, $src2$$XMMRegister, $vtmp$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); | ||
%} | ||
ins_pipe( pipe_slow ); | ||
%} | ||
|
||
|
||
// =======================Double Reduction========================================== | ||
|
||
instruct reduction2D(regD dst, vec src, vec vtmp) %{ | ||
|
@@ -5408,7 +5374,6 @@ instruct vaddD_mem(vec dst, vec src, memory mem) %{ | |
|
||
// Halffloat vector add | ||
instruct vaddHF_reg(vec dst, vec src1, vec src2) %{ | ||
predicate(UseAVX > 2); | ||
match(Set dst (AddVHF src1 src2)); | ||
format %{ "evaddph $dst,$src1,$src2\t! add packedHF" %} | ||
ins_encode %{ | ||
|
@@ -5628,7 +5593,6 @@ instruct vsubD_mem(vec dst, vec src, memory mem) %{ | |
|
||
// Halffloat vector sub | ||
instruct vsubHF_reg(vec dst, vec src1, vec src2) %{ | ||
predicate(UseAVX > 2); | ||
match(Set dst (SubVHF src1 src2)); | ||
format %{ "evsubph $dst,$src1,$src2\t! sub packedHF" %} | ||
ins_encode %{ | ||
|
@@ -5969,9 +5933,8 @@ instruct vmulD_mem(vec dst, vec src, memory mem) %{ | |
|
||
//Halffloat vector mul | ||
instruct vmulHF_reg(vec dst, vec src1, vec src2) %{ | ||
predicate(UseAVX > 2); | ||
match(Set dst (MulVHF src1 src2)); | ||
format %{ "vmulph $dst,$src1,$src2\t! mul packedHF" %} | ||
format %{ "evmulph $dst,$src1,$src2\t! mul packedHF" %} | ||
ins_encode %{ | ||
int vlen_enc = vector_length_encoding(this); | ||
__ evmulph($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); | ||
|
@@ -6087,7 +6050,6 @@ instruct vdivD_mem(vec dst, vec src, memory mem) %{ | |
|
||
// Halffloat vector div | ||
instruct vdivHF_reg(vec dst, vec src1, vec src2) %{ | ||
predicate(UseAVX > 2); | ||
match(Set dst (DivVHF src1 src2)); | ||
format %{ "evdivph $dst,$src1,$src2\t! div packedHF" %} | ||
ins_encode %{ | ||
|
@@ -7987,7 +7949,6 @@ instruct vabsL_reg(vec dst, vec src) %{ | |
|
||
// Half float abs | ||
instruct vabsHF_reg(vec dst, vec src) %{ | ||
predicate(UseAVX > 2); | ||
match(Set dst (AbsVHF src)); | ||
format %{ "vandps $dst,$src\t# $dst = |$src| abs packedHF" %} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Incorrect format string. |
||
ins_encode %{ | ||
|
@@ -7999,7 +7960,6 @@ instruct vabsHF_reg(vec dst, vec src) %{ | |
|
||
// Halffloat neg | ||
instruct vnegHF_reg(vec dst, vec src) %{ | ||
predicate(UseAVX > 2); | ||
match(Set dst (NegVHF src)); | ||
format %{"vxorps $dst, $src\t $dst = -$src neg packedHF" %} | ||
ins_encode %{ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if target does not support AVX512BW and vector length is 512 bits, vector size compatibility check will fail upfront and we won't reach till this point, currently the BasicType for halffloat is T_SHORT,
https://github.com/openjdk/panama-vector/pull/204/files#diff-d6a3624f0f0af65a98a47378a5c146eed5016ca09b4de1acd0a3acc823242e82L1690
Though, this may not show up in reality since FP16 ISA comes along with next gen Xeons which support both AVX512_FP16 and AVX512BW.