Java 大数据量读取

能有多快?

在一些算法竞赛中,可能会有一些题目数据量非常大,如果使用 Java 来编写代码,如何才能读取的更快呢?

正好在 SPOJ 这个平台上有一道很合适的题目:SPOJ.com - Problem INTEST

使用 java.util.Scanner

1
2
3
4
5
6
7
8
9
import java.util.Scanner;

class Main {
public static void main(String[] args) {
Scanner scanner = new Scanner(System.in);
int n = scanner.nextInt();
System.out.println(n);
}
}

使用 java.io.BufferedReader

使用 BufferedReader 配合 Integer.parseInt()Double.parseDouble()等方法。

1
2
3
4
5
6
7
8
9
10
import java.io.BufferedReader;
import java.io.InputStreamReader;

class Main {
public static void main(String[] args) throws Exception {
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(System.in));
int n = Integer.parseInt(bufferedReader.readLine());
System.out.println(n);
}
}

进阶版:自定义 parse

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.IOException;

class Reader {
final private int BUFFER_SIZE = 1 << 16;
private DataInputStream din;
private byte[] buffer;
private int bufferPointer, bytesRead;

public Reader() {
din = new DataInputStream(System.in);
buffer = new byte[BUFFER_SIZE];
bufferPointer = bytesRead = 0;
}

public Reader(String file_name) throws IOException {
din = new DataInputStream(new FileInputStream(file_name));
buffer = new byte[BUFFER_SIZE];
bufferPointer = bytesRead = 0;
}

public String readLine() throws IOException {
byte[] buf = new byte[64]; // line length
int cnt = 0;
int c;
while ((c = read()) != -1) {
if (c == '\n') {
break;
}
buf[cnt++] = (byte) c;
}
return new String(buf, 0, cnt);
}

public int nextInt() throws IOException {
int ret = 0;
byte c = read();
while (c <= ' ') {
c = read();
}
boolean neg = (c == '-');
if (neg) {
c = read();
}
do {
ret = ret * 10 + c - '0';
} while ((c = read()) >= '0' && c <= '9');

if (neg) {
return -ret;
}
return ret;
}

public long nextLong() throws IOException {
long ret = 0;
byte c = read();
while (c <= ' ') {
c = read();
}
boolean neg = (c == '-');
if (neg) {
c = read();
}
do {
ret = ret * 10 + c - '0';
}
while ((c = read()) >= '0' && c <= '9');
if (neg) {
return -ret;
}
return ret;
}

public double nextDouble() throws IOException {
double ret = 0;
double div = 1;
byte c = read();
while (c <= ' ') {
c = read();
}
boolean neg = (c == '-');
if (neg) {
c = read();
}

do {
ret = ret * 10 + c - '0';
}
while ((c = read()) >= '0' && c <= '9');

if (c == '.') {
while ((c = read()) >= '0' && c <= '9') {
ret += (c - '0') / (div *= 10);
}
}

if (neg) {
return -ret;
}
return ret;
}

private void fillBuffer() throws IOException {
bytesRead = din.read(buffer, bufferPointer = 0, BUFFER_SIZE);
if (bytesRead == -1) {
buffer[0] = -1;
}
}

private byte read() throws IOException {
if (bufferPointer == bytesRead) {
fillBuffer();
}
return buffer[bufferPointer++];
}

public void close() throws IOException {
if (din == null) {
return;
}
din.close();
}
}

速度对比

原因分析

为什么 java.util.Scanner 那么慢?

单行输入多个值的时候

比如

1
2
3
4
5
6
5
1 2
2 4
3 6
4 8
5 10

这个时候如果按照一行一行来读取数据的话,如果要把同一行的多个数据分离开,一种方法是使用 String.split() 方法,还有一种方法是使用 StringTokenizer

参考资料

  1. Fast I/O in Java in Competitive Programming - GeeksforGeeks
  2. Faster Input for Java
  3. Difference between Scanner and BufferReader Class in Java - GeeksforGeeks
  4. Performance of string tokenisation: String.split() and StringTokenizer compared