Skip to content

Commit

Permalink
8292758: put support for UNSIGNED5 format into its own header file
Browse files Browse the repository at this point in the history
Reviewed-by: dlong, coleenp
  • Loading branch information
John R Rose committed Sep 8, 2022
1 parent 6677227 commit 8d3399b
Show file tree
Hide file tree
Showing 9 changed files with 1,129 additions and 157 deletions.
54 changes: 34 additions & 20 deletions src/hotspot/share/code/compressedStream.cpp
Expand Up @@ -27,36 +27,45 @@
#include "utilities/ostream.hpp"
#include "utilities/moveBits.hpp"

// 32-bit self-inverse encoding of float bits
// converts trailing zeroes (common in floats) to leading zeroes
inline juint CompressedStream::reverse_int(juint i) {
return reverse_bits(i);
}

jint CompressedReadStream::read_signed_int() {
return decode_sign(read_int());
return UNSIGNED5::decode_sign(read_int());
}

// Compressing floats is simple, because the only common pattern
// is trailing zeroes. (Compare leading sign bits on ints.)
// Since floats are left-justified, as opposed to right-justified
// ints, we can bit-reverse them in order to take advantage of int
// compression.

// compression. Since bit reversal converts trailing zeroes to
// leading zeroes, effect is better compression of those common
// 32-bit float values, such as integers or integers divided by
// powers of two, that have many trailing zeroes.
jfloat CompressedReadStream::read_float() {
int rf = read_int();
int f = reverse_int(rf);
int f = reverse_bits(rf);
return jfloat_cast(f);
}

// The treatment of doubles is similar. We could bit-reverse each
// entire 64-bit word, but it is almost as effective to bit-reverse
// the individual halves. Since we are going to encode them
// separately as 32-bit halves anyway, it seems slightly simpler
// to reverse after splitting, and when reading reverse each
// half before joining them together.
jdouble CompressedReadStream::read_double() {
jint rh = read_int();
jint rl = read_int();
jint h = reverse_int(rh);
jint l = reverse_int(rl);
jint h = reverse_bits(rh);
jint l = reverse_bits(rl);
return jdouble_cast(jlong_from(h, l));
}

// A 64-bit long is encoded into distinct 32-bit halves. This saves
// us from having to define a 64-bit encoding and is almost as
// effective. A modified LEB128 could encode longs into 9 bytes, and
// this technique maxes out at 10 bytes, so, if we didn't mind the
// extra complexity of another coding system, we could process 64-bit
// values as single units. But, the complexity does not seem
// worthwhile.
jlong CompressedReadStream::read_long() {
jint low = read_signed_int();
jint high = read_signed_int();
Expand All @@ -70,26 +79,31 @@ CompressedWriteStream::CompressedWriteStream(int initial_size) : CompressedStrea
}

void CompressedWriteStream::grow() {
u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, _size * 2);
int nsize = _size * 2;
const int min_expansion = UNSIGNED5::MAX_LENGTH;
if (nsize < min_expansion*2) {
nsize = min_expansion*2;
}
u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, nsize);
memcpy(_new_buffer, _buffer, _position);
_buffer = _new_buffer;
_size = _size * 2;
_size = nsize;
}

void CompressedWriteStream::write_float(jfloat value) {
juint f = jint_cast(value);
juint rf = reverse_int(f);
assert(f == reverse_int(rf), "can re-read same bits");
juint rf = reverse_bits(f);
assert(f == reverse_bits(rf), "can re-read same bits");
write_int(rf);
}

void CompressedWriteStream::write_double(jdouble value) {
juint h = high(jlong_cast(value));
juint l = low( jlong_cast(value));
juint rh = reverse_int(h);
juint rl = reverse_int(l);
assert(h == reverse_int(rh), "can re-read same bits");
assert(l == reverse_int(rl), "can re-read same bits");
juint rh = reverse_bits(h);
juint rl = reverse_bits(l);
assert(h == reverse_bits(rh), "can re-read same bits");
assert(l == reverse_bits(rl), "can re-read same bits");
write_int(rh);
write_int(rl);
}
Expand Down
91 changes: 15 additions & 76 deletions src/hotspot/share/code/compressedStream.hpp
Expand Up @@ -26,6 +26,7 @@
#define SHARE_CODE_COMPRESSEDSTREAM_HPP

#include "memory/allocation.hpp"
#include "utilities/unsigned5.hpp"

// Simple interface for filing out and filing in basic types
// Used for writing out and reading in debugging information.
Expand All @@ -36,18 +37,6 @@ class CompressedStream : public ResourceObj {
u_char* _buffer;
int _position;

enum {
// Constants for UNSIGNED5 coding of Pack200
lg_H = 6, H = 1<<lg_H, // number of high codes (64)
L = (1<<BitsPerByte)-H, // number of low codes (192)
MAX_i = 4 // bytes are numbered in (0..4), max 5 bytes
};

// 32-bit one-to-one sign encoding taken from Pack200
// converts leading sign bits into leading zeroes with trailing sign bit
static juint encode_sign(jint value) { return (value << 1) ^ (value >> 31); }
static jint decode_sign(juint value) { return (value >> 1) ^ -(jint)(value & 1); }
static juint reverse_int(juint i); // to trim trailing float 0's
public:
CompressedStream(u_char* buffer, int position = 0) {
_buffer = buffer;
Expand All @@ -66,41 +55,6 @@ class CompressedReadStream : public CompressedStream {
private:
inline u_char read() { return _buffer[_position++]; }

// This encoding, called UNSIGNED5, is taken from J2SE Pack200.
// It assumes that most values have lots of leading zeroes.
// Very small values, in the range [0..191], code in one byte.
// Any 32-bit value (including negatives) can be coded, in
// up to five bytes. The grammar is:
// low_byte = [0..191]
// high_byte = [192..255]
// any_byte = low_byte | high_byte
// coding = low_byte
// | high_byte low_byte
// | high_byte high_byte low_byte
// | high_byte high_byte high_byte low_byte
// | high_byte high_byte high_byte high_byte any_byte
// Each high_byte contributes six bits of payload.
// The encoding is one-to-one (except for integer overflow)
// and easy to parse and unparse.

jint read_int_mb(jint b0) {
int pos = position() - 1;
u_char* buf = buffer() + pos;
assert(buf[0] == b0 && b0 >= L, "correctly called");
jint sum = b0;
// must collect more bytes: b[1]...b[4]
int lg_H_i = lg_H;
for (int i = 0; ; ) {
jint b_i = buf[++i]; // b_i = read(); ++i;
sum += b_i << lg_H_i; // sum += b[i]*(64**i)
if (b_i < L || i == MAX_i) {
set_position(pos+i+1);
return sum;
}
lg_H_i += lg_H;
}
}

public:
CompressedReadStream(u_char* buffer, int position = 0)
: CompressedStream(buffer, position) {}
Expand All @@ -109,14 +63,14 @@ class CompressedReadStream : public CompressedStream {
jbyte read_byte() { return (jbyte ) read(); }
jchar read_char() { return (jchar ) read_int(); }
jshort read_short() { return (jshort ) read_signed_int(); }
jint read_int() { jint b0 = read();
if (b0 < L) return b0;
else return read_int_mb(b0);
}
jint read_signed_int();
jfloat read_float(); // jfloat_cast(reverse_int(read_int()))
jdouble read_double(); // jdouble_cast(2*reverse_int(read_int))
jfloat read_float(); // jfloat_cast(reverse_bits(read_int()))
jdouble read_double(); // jdouble_cast(2*reverse_bits(read_int))
jlong read_long(); // jlong_from(2*read_signed_int())

jint read_int() {
return UNSIGNED5::read_uint(_buffer, _position, 0);
}
};


Expand All @@ -134,23 +88,6 @@ class CompressedWriteStream : public CompressedStream {
}
void grow();

// UNSIGNED5 coding, 1-5 byte cases
void write_int_mb(jint value) {
juint sum = value;
for (int i = 0; ; ) {
if (sum < L || i == MAX_i) {
// remainder is either a "low code" or the 5th byte
assert(sum == (u_char)sum, "valid byte");
write((u_char)sum);
break;
}
sum -= L;
int b_i = L + (sum % H); // this is a "high code"
sum >>= lg_H; // extracted 6 bits
write(b_i); ++i;
}
}

protected:
int _size;

Expand All @@ -163,13 +100,15 @@ class CompressedWriteStream : public CompressedStream {
void write_byte(jbyte value) { write(value); }
void write_char(jchar value) { write_int(value); }
void write_short(jshort value) { write_signed_int(value); }
void write_int(jint value) { if ((juint)value < L && !full())
store((u_char)value);
else write_int_mb(value); }
void write_signed_int(jint value) { write_int(encode_sign(value)); }
void write_float(jfloat value); // write_int(reverse_int(jint_cast(v)))
void write_double(jdouble value); // write_int(reverse_int(<low,high>))
void write_signed_int(jint value) { write_int(UNSIGNED5::encode_sign(value)); }
void write_float(jfloat value); // write_int(reverse_bits(jint_cast(v)))
void write_double(jdouble value); // write_int(reverse_bits(<low,high>))
void write_long(jlong value); // write_signed_int(<low,high>)

void write_int(juint value) {
UNSIGNED5::write_uint_grow(value, _buffer, _position, _size,
[&](int){ grow(); });
}
};

#endif // SHARE_CODE_COMPRESSEDSTREAM_HPP
32 changes: 32 additions & 0 deletions src/hotspot/share/utilities/debug.cpp
Expand Up @@ -61,6 +61,7 @@
#include "utilities/formatBuffer.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/macros.hpp"
#include "utilities/unsigned5.hpp"
#include "utilities/vmError.hpp"

#include <stdio.h>
Expand Down Expand Up @@ -648,6 +649,37 @@ extern "C" JNIEXPORT void findbcp(intptr_t method, intptr_t bcp) {
}
}

// check and decode a single u5 value
extern "C" JNIEXPORT u4 u5decode(intptr_t addr) {
Command c("u5decode");
u1* arr = (u1*)addr;
size_t off = 0, lim = 5;
if (!UNSIGNED5::check_length(arr, off, lim)) {
return 0;
}
return UNSIGNED5::read_uint(arr, off, lim);
}

// Sets up a Reader from addr/limit and prints count items.
// A limit of zero means no set limit; stop at the first null
// or after count items are printed.
// A count of zero or less is converted to -1, which means
// there is no limit on the count of items printed; the
// printing stops when an null is printed or at limit.
// See documentation for UNSIGNED5::Reader::print(count).
extern "C" JNIEXPORT intptr_t u5p(intptr_t addr,
intptr_t limit,
int count) {
Command c("u5p");
u1* arr = (u1*)addr;
if (limit && limit < addr) limit = addr;
size_t lim = !limit ? 0 : (limit - addr);
size_t endpos = UNSIGNED5::print_count(count > 0 ? count : -1,
arr, (size_t)0, lim);
return addr + endpos;
}


// int versions of all methods to avoid having to type type casts in the debugger

void pp(intptr_t p) { pp((void*)p); }
Expand Down
83 changes: 83 additions & 0 deletions src/hotspot/share/utilities/unsigned5.cpp
@@ -0,0 +1,83 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "memory/allocation.hpp"
#include "utilities/unsigned5.hpp"

// Most of UNSIGNED5 is in the header file.
// Let's put a few debug functions out-of-line here.

// For the record, UNSIGNED5 was defined around 2001 and was first
// published in the initial Pack200 spec. See:
// https://docs.oracle.com/en/java/javase/11/docs/specs/pack-spec.html
// in Section 6.1, "Encoding of Small Whole Numbers".

PRAGMA_DIAG_PUSH
PRAGMA_FORMAT_NONLITERAL_IGNORED

// For debugging, even in product builds (see debug.cpp).
template<typename ARR, typename OFF, typename GET>
void UNSIGNED5::Reader<ARR,OFF,GET>::
print_on(outputStream* st, int count,
const char* left, // "U5: ["
const char* right // "] (values=%d/length=%d)\n"
) {
if (left == NULL) left = "U5: [";
if (right == NULL) right = "] (values=%d/length=%d)\n";
int printed = 0;
st->print("%s", left);
for (;;) {
if (count >= 0 && printed >= count) break;
if (!has_next()) {
if ((_limit == 0 || _position < _limit) && _array[_position] == 0) {
st->print(" null");
++_position; // skip null byte
++printed;
if (_limit != 0) continue; // keep going to explicit limit
}
break;
}
u4 value = next_uint();
if (printed == 0)
st->print("%d", value);
else
st->print(" %d", value);
++printed;
}
st->print(right,
// these arguments may or may not be used in the format string:
printed,
(int)_position);
}

PRAGMA_DIAG_POP

// Explicit instantiation for supported types.
template void UNSIGNED5::Reader<char*,int>::
print_on(outputStream* st, int count, const char* left, const char* right);
template void UNSIGNED5::Reader<u1*,int>::
print_on(outputStream* st, int count, const char* left, const char* right);
template void UNSIGNED5::Reader<address,size_t>::
print_on(outputStream* st, int count, const char* left, const char* right);

1 comment on commit 8d3399b

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.