Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8292758: put support for UNSIGNED5 format into its own header file #10067

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 33 additions & 20 deletions src/hotspot/share/code/compressedStream.cpp
Expand Up @@ -27,36 +27,45 @@
#include "utilities/ostream.hpp"
#include "utilities/moveBits.hpp"

// 32-bit self-inverse encoding of float bits
// converts trailing zeroes (common in floats) to leading zeroes
inline juint CompressedStream::reverse_int(juint i) {
return reverse_bits(i);
}

jint CompressedReadStream::read_signed_int() {
return decode_sign(read_int());
return UNSIGNED5::decode_sign(read_int());
}

// Compressing floats is simple, because the only common pattern
// is trailing zeroes. (Compare leading sign bits on ints.)
// Since floats are left-justified, as opposed to right-justified
// ints, we can bit-reverse them in order to take advantage of int
// compression.

// compression. Since bit reversal converts trailing zeroes to
// leading zeroes, effect is better compression of those common
// 32-bit float values, such as integers or integers divided by
// powers of two, that have many trailing zeroes.
jfloat CompressedReadStream::read_float() {
int rf = read_int();
int f = reverse_int(rf);
int f = reverse_bits(rf);
return jfloat_cast(f);
}

// The treatment of doubles is similar. We could bit-reverse each
// entire 64-bit word, but it is almost as effective to bit-reverse
// the individual halves. Since we are going to encode them
// separately as 32-bit halves anyway, it seems slightly simpler
// to reverse after splitting, and when reading reverse each
// half before joining them together.
jdouble CompressedReadStream::read_double() {
jint rh = read_int();
jint rl = read_int();
jint h = reverse_int(rh);
jint l = reverse_int(rl);
jint h = reverse_bits(rh);
jint l = reverse_bits(rl);
return jdouble_cast(jlong_from(h, l));
}

// A 64-bit long is encoded into distinct 32-bit halves. This saves
// us from having to define a 64-bit encoding and is almost as
// effective. A modified LEB128 could encode longs into 9 bytes, and
// this technique maxes out at 10 bytes, so, if we didn't mind the
// extra complexity of another coding system, we could process 64-bit
// values as single units. But, the complexity does not seem
// worthwhile.
jlong CompressedReadStream::read_long() {
jint low = read_signed_int();
jint high = read_signed_int();
Expand All @@ -70,26 +79,30 @@ CompressedWriteStream::CompressedWriteStream(int initial_size) : CompressedStrea
}

void CompressedWriteStream::grow() {
u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, _size * 2);
int nsize = _size * 2;
const int min_expansion = UNSIGNED5::MAX_LENGTH;
if (nsize < min_expansion*2)
nsize = min_expansion*2;
Copy link
Member

@dean-long dean-long Sep 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear if this is needed or just an optimization. Maybe add a comment. Also, using MAX2 might be clearer.

rose00 marked this conversation as resolved.
Show resolved Hide resolved
u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, nsize);
memcpy(_new_buffer, _buffer, _position);
_buffer = _new_buffer;
_size = _size * 2;
_size = nsize;
}

void CompressedWriteStream::write_float(jfloat value) {
juint f = jint_cast(value);
juint rf = reverse_int(f);
assert(f == reverse_int(rf), "can re-read same bits");
juint rf = reverse_bits(f);
assert(f == reverse_bits(rf), "can re-read same bits");
write_int(rf);
}

void CompressedWriteStream::write_double(jdouble value) {
juint h = high(jlong_cast(value));
juint l = low( jlong_cast(value));
juint rh = reverse_int(h);
juint rl = reverse_int(l);
assert(h == reverse_int(rh), "can re-read same bits");
assert(l == reverse_int(rl), "can re-read same bits");
juint rh = reverse_bits(h);
juint rl = reverse_bits(l);
assert(h == reverse_bits(rh), "can re-read same bits");
assert(l == reverse_bits(rl), "can re-read same bits");
write_int(rh);
write_int(rl);
}
Expand Down
91 changes: 15 additions & 76 deletions src/hotspot/share/code/compressedStream.hpp
Expand Up @@ -26,6 +26,7 @@
#define SHARE_CODE_COMPRESSEDSTREAM_HPP

#include "memory/allocation.hpp"
#include "utilities/unsigned5.hpp"

// Simple interface for filing out and filing in basic types
// Used for writing out and reading in debugging information.
Expand All @@ -36,18 +37,6 @@ class CompressedStream : public ResourceObj {
u_char* _buffer;
int _position;

enum {
// Constants for UNSIGNED5 coding of Pack200
lg_H = 6, H = 1<<lg_H, // number of high codes (64)
L = (1<<BitsPerByte)-H, // number of low codes (192)
MAX_i = 4 // bytes are numbered in (0..4), max 5 bytes
};

// 32-bit one-to-one sign encoding taken from Pack200
// converts leading sign bits into leading zeroes with trailing sign bit
static juint encode_sign(jint value) { return (value << 1) ^ (value >> 31); }
static jint decode_sign(juint value) { return (value >> 1) ^ -(jint)(value & 1); }
static juint reverse_int(juint i); // to trim trailing float 0's
public:
CompressedStream(u_char* buffer, int position = 0) {
_buffer = buffer;
Expand All @@ -66,41 +55,6 @@ class CompressedReadStream : public CompressedStream {
private:
inline u_char read() { return _buffer[_position++]; }

// This encoding, called UNSIGNED5, is taken from J2SE Pack200.
// It assumes that most values have lots of leading zeroes.
// Very small values, in the range [0..191], code in one byte.
// Any 32-bit value (including negatives) can be coded, in
// up to five bytes. The grammar is:
// low_byte = [0..191]
// high_byte = [192..255]
// any_byte = low_byte | high_byte
// coding = low_byte
// | high_byte low_byte
// | high_byte high_byte low_byte
// | high_byte high_byte high_byte low_byte
// | high_byte high_byte high_byte high_byte any_byte
// Each high_byte contributes six bits of payload.
// The encoding is one-to-one (except for integer overflow)
// and easy to parse and unparse.

jint read_int_mb(jint b0) {
int pos = position() - 1;
u_char* buf = buffer() + pos;
assert(buf[0] == b0 && b0 >= L, "correctly called");
jint sum = b0;
// must collect more bytes: b[1]...b[4]
int lg_H_i = lg_H;
for (int i = 0; ; ) {
jint b_i = buf[++i]; // b_i = read(); ++i;
sum += b_i << lg_H_i; // sum += b[i]*(64**i)
if (b_i < L || i == MAX_i) {
set_position(pos+i+1);
return sum;
}
lg_H_i += lg_H;
}
}

public:
CompressedReadStream(u_char* buffer, int position = 0)
: CompressedStream(buffer, position) {}
Expand All @@ -109,14 +63,14 @@ class CompressedReadStream : public CompressedStream {
jbyte read_byte() { return (jbyte ) read(); }
jchar read_char() { return (jchar ) read_int(); }
jshort read_short() { return (jshort ) read_signed_int(); }
jint read_int() { jint b0 = read();
if (b0 < L) return b0;
else return read_int_mb(b0);
}
jint read_signed_int();
jfloat read_float(); // jfloat_cast(reverse_int(read_int()))
jdouble read_double(); // jdouble_cast(2*reverse_int(read_int))
jfloat read_float(); // jfloat_cast(reverse_bits(read_int()))
jdouble read_double(); // jdouble_cast(2*reverse_bits(read_int))
jlong read_long(); // jlong_from(2*read_signed_int())

jint read_int() {
return UNSIGNED5::read_uint(_buffer, _position, 0);
}
};


Expand All @@ -134,23 +88,6 @@ class CompressedWriteStream : public CompressedStream {
}
void grow();

// UNSIGNED5 coding, 1-5 byte cases
void write_int_mb(jint value) {
juint sum = value;
for (int i = 0; ; ) {
if (sum < L || i == MAX_i) {
// remainder is either a "low code" or the 5th byte
assert(sum == (u_char)sum, "valid byte");
write((u_char)sum);
break;
}
sum -= L;
int b_i = L + (sum % H); // this is a "high code"
sum >>= lg_H; // extracted 6 bits
write(b_i); ++i;
}
}

protected:
int _size;

Expand All @@ -163,13 +100,15 @@ class CompressedWriteStream : public CompressedStream {
void write_byte(jbyte value) { write(value); }
void write_char(jchar value) { write_int(value); }
void write_short(jshort value) { write_signed_int(value); }
void write_int(jint value) { if ((juint)value < L && !full())
store((u_char)value);
else write_int_mb(value); }
void write_signed_int(jint value) { write_int(encode_sign(value)); }
void write_float(jfloat value); // write_int(reverse_int(jint_cast(v)))
void write_double(jdouble value); // write_int(reverse_int(<low,high>))
void write_signed_int(jint value) { write_int(UNSIGNED5::encode_sign(value)); }
void write_float(jfloat value); // write_int(reverse_bits(jint_cast(v)))
void write_double(jdouble value); // write_int(reverse_bits(<low,high>))
void write_long(jlong value); // write_signed_int(<low,high>)

void write_int(juint value) {
UNSIGNED5::write_uint_grow(value, _buffer, _position, _size,
[&](int){ grow(); });
}
};

#endif // SHARE_CODE_COMPRESSEDSTREAM_HPP
44 changes: 44 additions & 0 deletions src/hotspot/share/utilities/debug.cpp
Expand Up @@ -61,6 +61,7 @@
#include "utilities/formatBuffer.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/macros.hpp"
#include "utilities/unsigned5.hpp"
#include "utilities/vmError.hpp"

#include <stdio.h>
Expand Down Expand Up @@ -648,6 +649,49 @@ extern "C" JNIEXPORT void findbcp(intptr_t method, intptr_t bcp) {
}
}

// check and decode a single u5 value
extern "C" JNIEXPORT u4 u5decode(intptr_t addr) {
Command c("u5decode");
u1* arr = (u1*)addr;
size_t off = 0, lim = 5;
if (!UNSIGNED5::check_length(arr, off, lim)) {
return 0;
}
return UNSIGNED5::read_uint(arr, off, lim);
}

// check and decode a series of u5 values
// return the address after the last decoded byte
// if limit is non-zero stop before limit
// if count is non-negative stop when count is reached
// if count is negative stop on null (works kind of like strlen)
extern "C" JNIEXPORT intptr_t u5p(intptr_t addr, intptr_t limit, int count) {
Command c("u5p");
u1* arr = (u1*)addr;
if (limit && limit < addr) limit = addr;
size_t off = 0, lim = (size_t)(limit - addr);
int printed = 0;
tty->print("U5: [");
for (;;) {
if (count >= 0 && printed >= count) break;
if (count < 0 && arr[off] == 0) {
tty->print(" null");
++off;
++printed;
continue;
} else if (!UNSIGNED5::check_length(arr, off, lim)) {
break;
}
u4 value = UNSIGNED5::read_uint(arr, off, lim);
tty->print(" %d", value);
++printed;
}
tty->print_cr(" ] (values=%d/length=%d)",
printed, (int)off);
return addr + off;
}


// int versions of all methods to avoid having to type type casts in the debugger

void pp(intptr_t p) { pp((void*)p); }
Expand Down
39 changes: 39 additions & 0 deletions src/hotspot/share/utilities/unsigned5.cpp
@@ -0,0 +1,39 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "memory/allocation.hpp"
#include "utilities/unsigned5.hpp"

// Explicit instantiation for supported types.

using AGS = UNSIGNED5::ArrayGetSet<u_char*,int>;

template u4 UNSIGNED5::read_uint(u_char* array, int& offset_rw, int limit, AGS);
template void UNSIGNED5::write_uint(uint32_t value, u_char* array, int& offset_rw, int limit, AGS);
template int UNSIGNED5::check_length(u_char* array, int offset, int limit, AGS);
rose00 marked this conversation as resolved.
Show resolved Hide resolved

//template uint32_t UNSIGNED5::read_uint(address array, size_t& offset_rw, size_t limit, AGS);
//template void UNSIGNED5::write_uint(uint32_t value, address array, size_t& offset_rw, size_t limit, AGS);
//template int UNSIGNED5::check_length(address array, size_t offset, size_t limit, AGS);