/src/bloaty/src/webassembly.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2018 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "bloaty.h" |
16 | | #include "util.h" |
17 | | |
18 | | #include "absl/strings/substitute.h" |
19 | | |
20 | | using absl::string_view; |
21 | | |
22 | | namespace bloaty { |
23 | | namespace wasm { |
24 | | |
25 | 1.91M | uint64_t ReadLEB128Internal(bool is_signed, size_t size, string_view* data) { |
26 | 1.91M | uint64_t ret = 0; |
27 | 1.91M | int shift = 0; |
28 | 1.91M | int maxshift = 70; |
29 | 1.91M | const char* ptr = data->data(); |
30 | 1.91M | const char* limit = ptr + data->size(); |
31 | | |
32 | 1.92M | while (ptr < limit && shift < maxshift) { |
33 | 1.92M | char byte = *(ptr++); |
34 | 1.92M | ret |= static_cast<uint64_t>(byte & 0x7f) << shift; |
35 | 1.92M | shift += 7; |
36 | 1.92M | if ((byte & 0x80) == 0) { |
37 | 1.91M | data->remove_prefix(ptr - data->data()); |
38 | 1.91M | if (is_signed && shift < size && (byte & 0x40)) { |
39 | 0 | ret |= -(1ULL << shift); |
40 | 0 | } |
41 | 1.91M | return ret; |
42 | 1.91M | } |
43 | 1.92M | } |
44 | | |
45 | 1.91M | THROW("corrupt wasm data, unterminated LEB128"); |
46 | 1.91M | } |
47 | | |
48 | 679 | bool ReadVarUInt1(string_view* data) { |
49 | 679 | return static_cast<bool>(ReadLEB128Internal(false, 1, data)); |
50 | 679 | } |
51 | | |
52 | 925k | uint8_t ReadVarUInt7(string_view* data) { |
53 | 925k | return static_cast<char>(ReadLEB128Internal(false, 7, data)); |
54 | 925k | } |
55 | | |
56 | 987k | uint32_t ReadVarUInt32(string_view* data) { |
57 | 987k | return static_cast<uint32_t>(ReadLEB128Internal(false, 32, data)); |
58 | 987k | } |
59 | | |
60 | 368 | int8_t ReadVarint7(string_view* data) { |
61 | 368 | return static_cast<int8_t>(ReadLEB128Internal(true, 7, data)); |
62 | 368 | } |
63 | | |
64 | 1.90M | string_view ReadPiece(size_t bytes, string_view* data) { |
65 | 1.90M | if(data->size() < bytes) { |
66 | 572 | THROW("premature EOF reading variable-length DWARF data"); |
67 | 572 | } |
68 | 1.90M | string_view ret = data->substr(0, bytes); |
69 | 1.90M | data->remove_prefix(bytes); |
70 | 1.90M | return ret; |
71 | 1.90M | } |
72 | | |
73 | 37.3k | bool ReadMagic(string_view* data) { |
74 | 37.3k | const uint32_t wasm_magic = 0x6d736100; |
75 | 37.3k | auto magic = ReadFixed<uint32_t>(data); |
76 | | |
77 | 37.3k | if (magic != wasm_magic) { |
78 | 6.75k | return false; |
79 | 6.75k | } |
80 | | |
81 | | // TODO(haberman): do we need to fail if this is >1? |
82 | 30.5k | auto version = ReadFixed<uint32_t>(data); |
83 | 30.5k | (void)version; |
84 | | |
85 | 30.5k | return true; |
86 | 37.3k | } |
87 | | |
88 | | class Section { |
89 | | public: |
90 | | uint32_t id; |
91 | | std::string name; |
92 | | string_view data; |
93 | | string_view contents; |
94 | | |
95 | 925k | static Section Read(string_view* data_param) { |
96 | 925k | Section ret; |
97 | 925k | string_view data = *data_param; |
98 | 925k | string_view section_data = data; |
99 | | |
100 | 925k | ret.id = ReadVarUInt7(&data); |
101 | 925k | uint32_t size = ReadVarUInt32(&data); |
102 | 925k | ret.contents = ReadPiece(size, &data); |
103 | 925k | size_t header_size = ret.contents.data() - section_data.data(); |
104 | 925k | ret.data = ReadPiece(size + header_size, §ion_data); |
105 | | |
106 | 925k | if (ret.id == 0) { |
107 | 54.6k | uint32_t name_len = ReadVarUInt32(&ret.contents); |
108 | 54.6k | ret.name = std::string(ReadPiece(name_len, &ret.contents)); |
109 | 870k | } else if (ret.id <= 13) { |
110 | 870k | ret.name = names[ret.id]; |
111 | 870k | } else { |
112 | 570 | THROWF("Unknown section id: $0", ret.id); |
113 | 570 | } |
114 | | |
115 | 925k | *data_param = data; |
116 | 925k | return ret; |
117 | 925k | } |
118 | | |
119 | | enum Name { |
120 | | kType = 1, |
121 | | kImport = 2, |
122 | | kFunction = 3, |
123 | | kTable = 4, |
124 | | kMemory = 5, |
125 | | kGlobal = 6, |
126 | | kExport = 7, |
127 | | kStart = 8, |
128 | | kElement = 9, |
129 | | kCode = 10, |
130 | | kData = 11, |
131 | | kDataCount = 12, |
132 | | kEvent = 13, |
133 | | }; |
134 | | |
135 | | static const char* names[]; |
136 | | }; |
137 | | |
138 | | const char* Section::names[] = { |
139 | | "<none>", // 0 |
140 | | "Type", // 1 |
141 | | "Import", // 2 |
142 | | "Function", // 3 |
143 | | "Table", // 4 |
144 | | "Memory", // 5 |
145 | | "Global", // 6 |
146 | | "Export", // 7 |
147 | | "Start", // 8 |
148 | | "Element", // 9 |
149 | | "Code", // 10 |
150 | | "Data", // 11 |
151 | | "DataCount", // 12 |
152 | | "Event", // 13 |
153 | | }; |
154 | | |
155 | | struct ExternalKind { |
156 | | enum Kind { |
157 | | kFunction = 0, |
158 | | kTable = 1, |
159 | | kMemory = 2, |
160 | | kGlobal = 3, |
161 | | }; |
162 | | }; |
163 | | |
164 | | template <class Func> |
165 | 17.7k | void ForEachSection(string_view file, Func&& section_func) { |
166 | 17.7k | string_view data = file; |
167 | 17.7k | ReadMagic(&data); |
168 | | |
169 | 943k | while (!data.empty()) { |
170 | 925k | Section section = Section::Read(&data); |
171 | 925k | section_func(section); |
172 | 925k | } |
173 | 17.7k | } webassembly.cc:void bloaty::wasm::ForEachSection<bloaty::wasm::ParseSections(bloaty::RangeSink*)::$_0>(std::__1::basic_string_view<char, std::__1::char_traits<char> >, bloaty::wasm::ParseSections(bloaty::RangeSink*)::$_0&&) Line | Count | Source | 165 | 8.26k | void ForEachSection(string_view file, Func&& section_func) { | 166 | 8.26k | string_view data = file; | 167 | 8.26k | ReadMagic(&data); | 168 | | | 169 | 436k | while (!data.empty()) { | 170 | 427k | Section section = Section::Read(&data); | 171 | 427k | section_func(section); | 172 | 427k | } | 173 | 8.26k | } |
webassembly.cc:void bloaty::wasm::ForEachSection<bloaty::wasm::ParseSymbols(bloaty::RangeSink*)::$_1>(std::__1::basic_string_view<char, std::__1::char_traits<char> >, bloaty::wasm::ParseSymbols(bloaty::RangeSink*)::$_1&&) Line | Count | Source | 165 | 935 | void ForEachSection(string_view file, Func&& section_func) { | 166 | 935 | string_view data = file; | 167 | 935 | ReadMagic(&data); | 168 | | | 169 | 52.7k | while (!data.empty()) { | 170 | 51.8k | Section section = Section::Read(&data); | 171 | 51.8k | section_func(section); | 172 | 51.8k | } | 173 | 935 | } |
webassembly.cc:void bloaty::wasm::ForEachSection<bloaty::wasm::ParseSymbols(bloaty::RangeSink*)::$_2>(std::__1::basic_string_view<char, std::__1::char_traits<char> >, bloaty::wasm::ParseSymbols(bloaty::RangeSink*)::$_2&&) Line | Count | Source | 165 | 935 | void ForEachSection(string_view file, Func&& section_func) { | 166 | 935 | string_view data = file; | 167 | 935 | ReadMagic(&data); | 168 | | | 169 | 21.9k | while (!data.empty()) { | 170 | 21.0k | Section section = Section::Read(&data); | 171 | 21.0k | section_func(section); | 172 | 21.0k | } | 173 | 935 | } |
webassembly.cc:void bloaty::wasm::ForEachSection<bloaty::wasm::AddWebAssemblyFallback(bloaty::RangeSink*)::$_3>(std::__1::basic_string_view<char, std::__1::char_traits<char> >, bloaty::wasm::AddWebAssemblyFallback(bloaty::RangeSink*)::$_3&&) Line | Count | Source | 165 | 7.63k | void ForEachSection(string_view file, Func&& section_func) { | 166 | 7.63k | string_view data = file; | 167 | 7.63k | ReadMagic(&data); | 168 | | | 169 | 432k | while (!data.empty()) { | 170 | 424k | Section section = Section::Read(&data); | 171 | 424k | section_func(section); | 172 | 424k | } | 173 | 7.63k | } |
|
174 | | |
175 | 8.26k | void ParseSections(RangeSink* sink) { |
176 | 427k | ForEachSection(sink->input_file().data(), [sink](const Section& section) { |
177 | 427k | sink->AddFileRange("wasm_sections", section.name, section.data); |
178 | 427k | }); |
179 | 8.26k | } |
180 | | |
181 | | typedef std::unordered_map<int, std::string> IndexedNames; |
182 | | |
183 | | void ReadNames(const Section& section, IndexedNames* func_names, |
184 | 0 | IndexedNames* dataseg_names, RangeSink* sink) { |
185 | 0 | enum class NameType { |
186 | 0 | kModule = 0, |
187 | 0 | kFunction = 1, |
188 | 0 | kLocal = 2, |
189 | 0 | kLabel = 3, |
190 | 0 | kType = 4, |
191 | 0 | kTable = 5, |
192 | 0 | kMemory = 6, |
193 | 0 | kGlobal = 7, |
194 | 0 | kElemSegment = 8, |
195 | 0 | kDataSegment = 9 |
196 | 0 | }; |
197 | |
|
198 | 0 | string_view data = section.contents; |
199 | |
|
200 | 0 | while (!data.empty()) { |
201 | 0 | NameType type = static_cast<NameType>(ReadVarUInt7(&data)); |
202 | 0 | uint32_t size = ReadVarUInt32(&data); |
203 | 0 | string_view section = ReadPiece(size, &data); |
204 | |
|
205 | 0 | if (type == NameType::kFunction || type == NameType::kDataSegment) { |
206 | 0 | uint32_t count = ReadVarUInt32(§ion); |
207 | 0 | for (uint32_t i = 0; i < count; i++) { |
208 | 0 | string_view entry = section; |
209 | 0 | uint32_t index = ReadVarUInt32(§ion); |
210 | 0 | uint32_t name_len = ReadVarUInt32(§ion); |
211 | 0 | string_view name = ReadPiece(name_len, §ion); |
212 | 0 | entry = StrictSubstr(entry, 0, name.data() - entry.data() + name.size()); |
213 | 0 | sink->AddFileRange("wasm_funcname", name, entry); |
214 | 0 | IndexedNames *names = (type == NameType::kFunction ? func_names : dataseg_names); |
215 | 0 | (*names)[index] = std::string(name); |
216 | 0 | } |
217 | 0 | } |
218 | 0 | } |
219 | 0 | } |
220 | | |
221 | 106 | int ReadValueType(string_view* data) { |
222 | 106 | return ReadVarint7(data); |
223 | 106 | } |
224 | | |
225 | 262 | int ReadElemType(string_view* data) { |
226 | 262 | return ReadVarint7(data); |
227 | 262 | } |
228 | | |
229 | 589 | void ReadResizableLimits(string_view* data) { |
230 | 589 | auto flags = ReadVarUInt1(data); |
231 | 589 | ReadVarUInt32(data); |
232 | 589 | if (flags) { |
233 | 460 | ReadVarUInt32(data); |
234 | 460 | } |
235 | 589 | } |
236 | | |
237 | 106 | void ReadGlobalType(string_view* data) { |
238 | 106 | ReadValueType(data); |
239 | 106 | ReadVarUInt1(data); |
240 | 106 | } |
241 | | |
242 | 262 | void ReadTableType(string_view* data) { |
243 | 262 | ReadElemType(data); |
244 | 262 | ReadResizableLimits(data); |
245 | 262 | } |
246 | | |
247 | 337 | void ReadMemoryType(string_view* data) { |
248 | 337 | ReadResizableLimits(data); |
249 | 337 | } |
250 | | |
251 | 1.09k | uint32_t GetNumFunctionImports(const Section& section) { |
252 | 1.09k | assert(section.id == Section::kImport); |
253 | 0 | string_view data = section.contents; |
254 | | |
255 | 1.09k | uint32_t count = ReadVarUInt32(&data); |
256 | 1.09k | uint32_t func_count = 0; |
257 | | |
258 | 2.05k | for (uint32_t i = 0; i < count; i++) { |
259 | 1.39k | uint32_t module_len = ReadVarUInt32(&data); |
260 | 1.39k | ReadPiece(module_len, &data); |
261 | 1.39k | uint32_t field_len = ReadVarUInt32(&data); |
262 | 1.39k | ReadPiece(field_len, &data); |
263 | 1.39k | auto kind = ReadFixed<uint8_t>(&data); |
264 | | |
265 | 1.39k | switch (kind) { |
266 | 335 | case ExternalKind::kFunction: |
267 | 335 | func_count++; |
268 | 335 | ReadVarUInt32(&data); |
269 | 335 | break; |
270 | 262 | case ExternalKind::kTable: |
271 | 262 | ReadTableType(&data); |
272 | 262 | break; |
273 | 337 | case ExternalKind::kMemory: |
274 | 337 | ReadMemoryType(&data); |
275 | 337 | break; |
276 | 106 | case ExternalKind::kGlobal: |
277 | 106 | ReadGlobalType(&data); |
278 | 106 | break; |
279 | 45 | default: |
280 | 45 | THROWF("Unrecognized import kind: $0", kind); |
281 | 1.39k | } |
282 | 1.39k | } |
283 | | |
284 | 658 | return func_count; |
285 | 1.09k | } |
286 | | |
287 | | void ReadCodeSection(const Section& section, const IndexedNames& names, |
288 | 181 | uint32_t num_imports, RangeSink* sink) { |
289 | 181 | string_view data = section.contents; |
290 | | |
291 | 181 | uint32_t count = ReadVarUInt32(&data); |
292 | | |
293 | 720 | for (uint32_t i = 0; i < count; i++) { |
294 | 539 | string_view func = data; |
295 | 539 | uint32_t size = ReadVarUInt32(&data); |
296 | 539 | uint32_t total_size = size + (data.data() - func.data()); |
297 | | |
298 | 539 | func = StrictSubstr(func, 0, total_size); |
299 | 539 | data = StrictSubstr(data, size); |
300 | | |
301 | 539 | auto iter = names.find(num_imports + i); |
302 | | |
303 | 539 | if (iter == names.end()) { |
304 | 455 | std::string name = "func[" + std::to_string(i) + "]"; |
305 | 455 | sink->AddFileRange("wasm_function", name, func); |
306 | 455 | } else { |
307 | 84 | sink->AddFileRange("wasm_function", ItaniumDemangle(iter->second, sink->data_source()), func); |
308 | 84 | } |
309 | 539 | } |
310 | 181 | } |
311 | | |
312 | | void ReadDataSection(const Section& section, const IndexedNames& names, |
313 | 818 | RangeSink* sink) { |
314 | 818 | string_view data = section.contents; |
315 | 818 | uint32_t count = ReadVarUInt32(&data); |
316 | | |
317 | 2.03k | for (uint32_t i = 0; i < count; i++) { |
318 | 1.30k | string_view segment = data; |
319 | 1.30k | uint8_t mode = ReadFixed<uint8_t>(&data); |
320 | 1.30k | if (mode > 1) THROW("multi-memory extension isn't supported"); |
321 | 1.21k | if (mode == 0) { // Active segment |
322 | | // We will need to read the init expr. |
323 | | // For the extended const proposal, read instructions until end is reached |
324 | | // Otherwise, just read a constexpr inst (t.const or global.get) |
325 | | // For now, we just need to support passive segments. |
326 | 840 | continue; |
327 | 840 | } |
328 | | // else, a passive segment |
329 | | |
330 | 376 | uint32_t segment_size = ReadVarUInt32(&data); |
331 | 376 | uint32_t total_size = segment_size + (data.data() - segment.data()); |
332 | | |
333 | 376 | segment = StrictSubstr(segment, 0, total_size); |
334 | 376 | data = StrictSubstr(data, segment_size); |
335 | | |
336 | 376 | auto iter = names.find(i); |
337 | 376 | if (iter == names.end()) { |
338 | 312 | std::string name = "data[" + std::to_string(i) + "]"; |
339 | 312 | sink->AddFileRange("wasm_data", name, segment); |
340 | 312 | } else { |
341 | 64 | sink->AddFileRange("wasm_data", iter->second, segment); |
342 | 64 | } |
343 | 376 | } |
344 | 818 | } |
345 | | |
346 | | |
347 | 935 | void ParseSymbols(RangeSink* sink) { |
348 | | // First pass: read the custom naming section to get function names. |
349 | 935 | std::unordered_map<int, std::string> func_names; |
350 | 935 | std::unordered_map<int, std::string> dataseg_names; |
351 | 935 | uint32_t num_imports = 0; |
352 | | |
353 | 935 | ForEachSection(sink->input_file().data(), |
354 | 51.8k | [&func_names, &dataseg_names, sink](const Section& section) { |
355 | 51.8k | if (section.name == "name") { |
356 | 0 | ReadNames(section, &func_names, &dataseg_names, sink); |
357 | 0 | } |
358 | 51.8k | }); |
359 | | |
360 | | // Second pass: read the function/code sections. |
361 | 935 | ForEachSection(sink->input_file().data(), |
362 | 21.0k | [&func_names, &dataseg_names, &num_imports, sink](const Section& section) { |
363 | 21.0k | if (section.id == Section::kImport) { |
364 | 1.09k | num_imports = GetNumFunctionImports(section); |
365 | 19.9k | } else if (section.id == Section::kCode) { |
366 | 181 | ReadCodeSection(section, func_names, num_imports, sink); |
367 | 19.7k | } else if (section.id == Section::kData) { |
368 | 818 | ReadDataSection(section, dataseg_names, sink); |
369 | 818 | } |
370 | 21.0k | }); |
371 | 935 | } |
372 | | |
373 | 7.63k | void AddWebAssemblyFallback(RangeSink* sink) { |
374 | 424k | ForEachSection(sink->input_file().data(), [sink](const Section& section) { |
375 | 424k | std::string name2 = |
376 | 424k | std::string("[section ") + std::string(section.name) + std::string("]"); |
377 | 424k | sink->AddFileRange("wasm_overhead", name2, section.data); |
378 | 424k | }); |
379 | 7.63k | sink->AddFileRange("wasm_overhead", "[WASM Header]", |
380 | 7.63k | StrictSubstr(sink->input_file().data(), 0, 8)); |
381 | 7.63k | } |
382 | | |
383 | | class WebAssemblyObjectFile : public ObjectFile { |
384 | | public: |
385 | | WebAssemblyObjectFile(std::unique_ptr<InputFile> file_data) |
386 | 12.7k | : ObjectFile(std::move(file_data)) {} |
387 | | |
388 | 12.7k | std::string GetBuildId() const override { |
389 | | // TODO(haberman): does WebAssembly support this? |
390 | 12.7k | return std::string(); |
391 | 12.7k | } |
392 | | |
393 | 6.39k | void ProcessFile(const std::vector<RangeSink*>& sinks) const override { |
394 | 12.0k | for (auto sink : sinks) { |
395 | 12.0k | switch (sink->data_source()) { |
396 | 7.32k | case DataSource::kSegments: |
397 | 8.26k | case DataSource::kSections: |
398 | 8.26k | ParseSections(sink); |
399 | 8.26k | break; |
400 | 0 | case DataSource::kSymbols: |
401 | 0 | case DataSource::kRawSymbols: |
402 | 935 | case DataSource::kShortSymbols: |
403 | 935 | case DataSource::kFullSymbols: |
404 | 935 | ParseSymbols(sink); |
405 | 935 | break; |
406 | 935 | case DataSource::kArchiveMembers: |
407 | 1.87k | case DataSource::kCompileUnits: |
408 | 2.80k | case DataSource::kInlines: |
409 | 2.80k | default: |
410 | 2.80k | THROW("WebAssembly doesn't support this data source"); |
411 | 12.0k | } |
412 | 7.63k | AddWebAssemblyFallback(sink); |
413 | 7.63k | } |
414 | 6.39k | } |
415 | | |
416 | | bool GetDisassemblyInfo(absl::string_view /*symbol*/, |
417 | | DataSource /*symbol_source*/, |
418 | 0 | DisassemblyInfo* /*info*/) const override { |
419 | 0 | WARN("WebAssembly files do not support disassembly yet"); |
420 | 0 | return false; |
421 | 0 | } |
422 | | }; |
423 | | |
424 | | } // namespace wasm |
425 | | |
426 | | std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile( |
427 | 19.5k | std::unique_ptr<InputFile>& file) { |
428 | 19.5k | string_view data = file->data(); |
429 | 19.5k | if (wasm::ReadMagic(&data)) { |
430 | 12.7k | return std::unique_ptr<ObjectFile>( |
431 | 12.7k | new wasm::WebAssemblyObjectFile(std::move(file))); |
432 | 12.7k | } |
433 | | |
434 | 6.75k | return nullptr; |
435 | 19.5k | } |
436 | | |
437 | | } // namespace bloaty |