Skip to content

Commit ac86f59

Browse files
iklamJohn R Rose
and
John R Rose
committedMay 9, 2024
8330532: Improve line-oriented text parsing in HotSpot
Co-authored-by: John R Rose <jrose@openjdk.org> Reviewed-by: matsaave, jsjolen
1 parent 964d608 commit ac86f59

File tree

11 files changed

+1266
-157
lines changed

11 files changed

+1266
-157
lines changed
 

‎src/hotspot/share/cds/classListParser.cpp

+72-57
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -50,30 +50,24 @@
5050
#include "runtime/javaCalls.hpp"
5151
#include "utilities/defaultStream.hpp"
5252
#include "utilities/macros.hpp"
53+
#include "utilities/utf8.hpp"
5354

5455
volatile Thread* ClassListParser::_parsing_thread = nullptr;
5556
ClassListParser* ClassListParser::_instance = nullptr;
5657

57-
ClassListParser::ClassListParser(const char* file, ParseMode parse_mode) : _id2klass_table(INITIAL_TABLE_SIZE, MAX_TABLE_SIZE) {
58+
ClassListParser::ClassListParser(const char* file, ParseMode parse_mode) :
59+
_classlist_file(file),
60+
_id2klass_table(INITIAL_TABLE_SIZE, MAX_TABLE_SIZE),
61+
_file_input(do_open(file), /* need_close=*/true),
62+
_input_stream(&_file_input) {
5863
log_info(cds)("Parsing %s%s", file,
5964
(parse_mode == _parse_lambda_forms_invokers_only) ? " (lambda form invokers only)" : "");
60-
_classlist_file = file;
61-
_file = nullptr;
62-
// Use os::open() because neither fopen() nor os::fopen()
63-
// can handle long path name on Windows.
64-
int fd = os::open(file, O_RDONLY, S_IREAD);
65-
if (fd != -1) {
66-
// Obtain a File* from the file descriptor so that fgets()
67-
// can be used in parse_one_line()
68-
_file = os::fdopen(fd, "r");
69-
}
70-
if (_file == nullptr) {
65+
if (!_file_input.is_open()) {
7166
char errmsg[JVM_MAXPATHLEN];
7267
os::lasterror(errmsg, JVM_MAXPATHLEN);
7368
vm_exit_during_initialization("Loading classlist failed", errmsg);
7469
}
75-
_line_no = 0;
76-
_token = _line;
70+
_token = _line = nullptr;
7771
_interfaces = new (mtClass) GrowableArray<int>(10, mtClass);
7872
_indy_items = new (mtClass) GrowableArray<const char*>(9, mtClass);
7973
_parse_mode = parse_mode;
@@ -84,14 +78,24 @@ ClassListParser::ClassListParser(const char* file, ParseMode parse_mode) : _id2k
8478
Atomic::store(&_parsing_thread, Thread::current());
8579
}
8680

81+
FILE* ClassListParser::do_open(const char* file) {
82+
// Use os::open() because neither fopen() nor os::fopen()
83+
// can handle long path name on Windows. (See JDK-8216184)
84+
int fd = os::open(file, O_RDONLY, S_IREAD);
85+
FILE* fp = nullptr;
86+
if (fd != -1) {
87+
// Obtain a FILE* from the file descriptor so that _input_stream
88+
// can be used in ClassListParser::parse()
89+
fp = os::fdopen(fd, "r");
90+
}
91+
return fp;
92+
}
93+
8794
bool ClassListParser::is_parsing_thread() {
8895
return Atomic::load(&_parsing_thread) == Thread::current();
8996
}
9097

9198
ClassListParser::~ClassListParser() {
92-
if (_file != nullptr) {
93-
fclose(_file);
94-
}
9599
Atomic::store(&_parsing_thread, (Thread*)nullptr);
96100
delete _indy_items;
97101
delete _interfaces;
@@ -101,7 +105,15 @@ ClassListParser::~ClassListParser() {
101105
int ClassListParser::parse(TRAPS) {
102106
int class_count = 0;
103107

104-
while (parse_one_line()) {
108+
for (; !_input_stream.done(); _input_stream.next()) {
109+
_line = _input_stream.current_line();
110+
if (*_line == '#') { // comment
111+
continue;
112+
}
113+
if (!parse_one_line()) {
114+
break;
115+
}
116+
105117
if (lambda_form_line()) {
106118
// The current line is "@lambda-form-invoker ...". It has been recorded in LambdaFormInvokers,
107119
// and will be processed later.
@@ -112,6 +124,7 @@ int ClassListParser::parse(TRAPS) {
112124
continue;
113125
}
114126

127+
check_class_name(_class_name);
115128
TempNewSymbol class_name_symbol = SymbolTable::new_symbol(_class_name);
116129
if (_indy_items->length() > 0) {
117130
// The current line is "@lambda-proxy class_name". Load the proxy class.
@@ -165,43 +178,26 @@ int ClassListParser::parse(TRAPS) {
165178
}
166179

167180
bool ClassListParser::parse_one_line() {
168-
for (;;) {
169-
if (fgets(_line, sizeof(_line), _file) == nullptr) {
170-
return false;
171-
}
172-
++ _line_no;
173-
_line_len = (int)strlen(_line);
174-
if (_line_len > _max_allowed_line_len) {
175-
error("input line too long (must be no longer than %d chars)", _max_allowed_line_len);
176-
}
177-
if (*_line == '#') { // comment
178-
continue;
179-
}
180-
181-
{
182-
int len = (int)strlen(_line);
183-
int i;
184-
// Replace \t\r\n\f with ' '
185-
for (i=0; i<len; i++) {
186-
if (_line[i] == '\t' || _line[i] == '\r' || _line[i] == '\n' || _line[i] == '\f') {
187-
_line[i] = ' ';
188-
}
181+
{
182+
int len = (int)strlen(_line);
183+
int i;
184+
// Replace \t\r\n\f with ' '
185+
for (i=0; i<len; i++) {
186+
if (_line[i] == '\t' || _line[i] == '\r' || _line[i] == '\n' || _line[i] == '\f') {
187+
_line[i] = ' ';
189188
}
189+
}
190190

191-
// Remove trailing newline/space
192-
while (len > 0) {
193-
if (_line[len-1] == ' ') {
194-
_line[len-1] = '\0';
195-
len --;
196-
} else {
197-
break;
198-
}
191+
// Remove trailing newline/space
192+
while (len > 0) {
193+
if (_line[len-1] == ' ') {
194+
_line[len-1] = '\0';
195+
len --;
196+
} else {
197+
break;
199198
}
200-
_line_len = len;
201199
}
202-
203-
// valid line
204-
break;
200+
_line_len = len;
205201
}
206202

207203
_class_name = _line;
@@ -286,7 +282,7 @@ int ClassListParser::split_at_tag_from_line() {
286282
_token = _line;
287283
char* ptr;
288284
if ((ptr = strchr(_line, ' ')) == nullptr) {
289-
error("Too few items following the @ tag \"%s\" line #%d", _line, _line_no);
285+
error("Too few items following the @ tag \"%s\" line #%zu", _line, lineno());
290286
return 0;
291287
}
292288
*ptr++ = '\0';
@@ -304,7 +300,7 @@ bool ClassListParser::parse_at_tags() {
304300
if (strcmp(_token, LAMBDA_PROXY_TAG) == 0) {
305301
split_tokens_by_whitespace(offset);
306302
if (_indy_items->length() < 2) {
307-
error("Line with @ tag has too few items \"%s\" line #%d", _token, _line_no);
303+
error("Line with @ tag has too few items \"%s\" line #%zu", _token, lineno());
308304
return false;
309305
}
310306
// set the class name
@@ -315,7 +311,7 @@ bool ClassListParser::parse_at_tags() {
315311
_lambda_form_line = true;
316312
return true;
317313
} else {
318-
error("Invalid @ tag at the beginning of line \"%s\" line #%d", _token, _line_no);
314+
error("Invalid @ tag at the beginning of line \"%s\" line #%zu", _token, lineno());
319315
return false;
320316
}
321317
}
@@ -423,8 +419,8 @@ void ClassListParser::error(const char* msg, ...) {
423419
}
424420

425421
jio_fprintf(defaultStream::error_stream(),
426-
"An error has occurred while processing class list file %s %d:%d.\n",
427-
_classlist_file, _line_no, (error_index + 1));
422+
"An error has occurred while processing class list file %s %zu:%d.\n",
423+
_classlist_file, lineno(), (error_index + 1));
428424
jio_vfprintf(defaultStream::error_stream(), msg, ap);
429425

430426
if (_line_len <= 0) {
@@ -450,6 +446,25 @@ void ClassListParser::error(const char* msg, ...) {
450446
va_end(ap);
451447
}
452448

449+
void ClassListParser::check_class_name(const char* class_name) {
450+
const char* err = nullptr;
451+
size_t len = strlen(class_name);
452+
if (len > (size_t)Symbol::max_length()) {
453+
err = "class name too long";
454+
} else {
455+
assert(Symbol::max_length() < INT_MAX && len < INT_MAX, "must be");
456+
if (!UTF8::is_legal_utf8((const unsigned char*)class_name, (int)len, /*version_leq_47*/false)) {
457+
err = "class name is not valid UTF8";
458+
}
459+
}
460+
if (err != nullptr) {
461+
jio_fprintf(defaultStream::error_stream(),
462+
"An error has occurred while processing class list file %s:%zu %s\n",
463+
_classlist_file, lineno(), err);
464+
vm_exit_during_initialization("class list format error.", nullptr);
465+
}
466+
}
467+
453468
// This function is used for loading classes for customized class loaders
454469
// during archive dumping.
455470
InstanceKlass* ClassListParser::load_class_from_source(Symbol* class_name, TRAPS) {

‎src/hotspot/share/cds/classListParser.hpp

+8-14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
2828
#include "utilities/exceptions.hpp"
2929
#include "utilities/globalDefinitions.hpp"
3030
#include "utilities/growableArray.hpp"
31+
#include "utilities/istream.hpp"
3132
#include "utilities/resizeableResourceHash.hpp"
3233

3334
#define LAMBDA_PROXY_TAG "@lambda-proxy"
@@ -80,14 +81,6 @@ class ClassListParser : public StackObj {
8081

8182
enum {
8283
_unspecified = -999,
83-
84-
// Max number of bytes allowed per line in the classlist.
85-
// Theoretically Java class names could be 65535 bytes in length. Also, an input line
86-
// could have a very long path name up to JVM_MAXPATHLEN bytes in length. In reality,
87-
// 4K bytes is more than enough.
88-
_max_allowed_line_len = 4096,
89-
_line_buf_extra = 10, // for detecting input too long
90-
_line_buf_size = _max_allowed_line_len + _line_buf_extra
9184
};
9285

9386
// Use a small initial size in debug build to test resizing logic
@@ -96,16 +89,14 @@ class ClassListParser : public StackObj {
9689
static volatile Thread* _parsing_thread; // the thread that created _instance
9790
static ClassListParser* _instance; // the singleton.
9891
const char* _classlist_file;
99-
FILE* _file;
10092

10193
ID2KlassTable _id2klass_table;
10294

103-
// The following field contains information from the *current* line being
104-
// parsed.
105-
char _line[_line_buf_size]; // The buffer that holds the current line. Some characters in
95+
FileInput _file_input;
96+
inputStream _input_stream;
97+
char* _line; // The buffer that holds the current line. Some characters in
10698
// the buffer may be overwritten by '\0' during parsing.
10799
int _line_len; // Original length of the input line.
108-
int _line_no; // Line number for current line being parsed
109100
const char* _class_name;
110101
GrowableArray<const char*>* _indy_items; // items related to invoke dynamic for archiving lambda proxy classes
111102
int _id;
@@ -132,6 +123,8 @@ class ClassListParser : public StackObj {
132123
bool parse_one_line();
133124
Klass* load_current_class(Symbol* class_name_symbol, TRAPS);
134125

126+
size_t lineno() { return _input_stream.lineno(); }
127+
FILE* do_open(const char* file);
135128
ClassListParser(const char* file, ParseMode _parse_mode);
136129
~ClassListParser();
137130

@@ -183,6 +176,7 @@ class ClassListParser : public StackObj {
183176
error("%s id %d is not yet loaded", which, id);
184177
}
185178
}
179+
void check_class_name(const char* class_name);
186180

187181
const char* current_class_name() {
188182
return _class_name;

‎src/hotspot/share/compiler/compilerOracle.cpp

+16-41
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "runtime/handles.inline.hpp"
4141
#include "runtime/jniHandles.hpp"
4242
#include "runtime/os.hpp"
43+
#include "utilities/istream.hpp"
4344
#include "utilities/parseInteger.hpp"
4445

4546
// Default compile commands, if defined, are parsed before any of the
@@ -1081,55 +1082,29 @@ bool CompilerOracle::parse_from_file() {
10811082
return true;
10821083
}
10831084

1084-
char token[1024];
1085-
int pos = 0;
1086-
int c = getc(stream);
1087-
bool success = true;
1088-
while(c != EOF && pos < (int)(sizeof(token)-1)) {
1089-
if (c == '\n') {
1090-
token[pos++] = '\0';
1091-
if (!parse_from_line(token)) {
1092-
success = false;
1093-
}
1094-
pos = 0;
1095-
} else {
1096-
token[pos++] = c;
1097-
}
1098-
c = getc(stream);
1099-
}
1100-
token[pos++] = '\0';
1101-
if (!parse_from_line(token)) {
1102-
success = false;
1103-
}
1104-
fclose(stream);
1105-
return success;
1085+
FileInput input(stream, /*need_close=*/ true);
1086+
return parse_from_input(&input, parse_from_line);
11061087
}
11071088

1108-
bool CompilerOracle::parse_from_string(const char* str, bool (*parse_line)(char*)) {
1109-
char token[1024];
1110-
int pos = 0;
1111-
const char* sp = str;
1112-
int c = *sp++;
1089+
bool CompilerOracle::parse_from_input(inputStream::Input* input,
1090+
CompilerOracle::
1091+
parse_from_line_fn_t* parse_from_line) {
11131092
bool success = true;
1114-
while (c != '\0' && pos < (int)(sizeof(token)-1)) {
1115-
if (c == '\n') {
1116-
token[pos++] = '\0';
1117-
if (!parse_line(token)) {
1118-
success = false;
1119-
}
1120-
pos = 0;
1121-
} else {
1122-
token[pos++] = c;
1093+
for (inputStream in(input); !in.done(); in.next()) {
1094+
if (!parse_from_line(in.current_line())) {
1095+
success = false;
11231096
}
1124-
c = *sp++;
1125-
}
1126-
token[pos++] = '\0';
1127-
if (!parse_line(token)) {
1128-
success = false;
11291097
}
11301098
return success;
11311099
}
11321100

1101+
bool CompilerOracle::parse_from_string(const char* str,
1102+
CompilerOracle::
1103+
parse_from_line_fn_t* parse_from_line) {
1104+
MemoryInput input(str, strlen(str));
1105+
return parse_from_input(&input, parse_from_line);
1106+
}
1107+
11331108
bool compilerOracle_init() {
11341109
bool success = true;
11351110
// Register default compile commands first - any commands specified via CompileCommand will

0 commit comments

Comments
 (0)
Please sign in to comment.