Word Unperfect
public
Read
Owner: themaster
Branch: main
Commits: 0
Git CLI clone URL
git clone https://www.xt-emporium.com/git/word-unperfect.git
Fullscreen desktop URL
Code
Commits
History
Branches
Bug Reports
Discussions
Compare
Settings
word-unperfect
/
rev
/
wp_corpus_validator.c
File editor
#include "wp_corpus_validator.h" #include "wp_document_analyzer.h" #include "wp_file_format.h" #include "wp_fixed_codes.h" #include "wp_layout_shared.h" #include "wp_record_parser.h" #include <dirent.h> #include <stdlib.h> #include <string.h> #include <sys/stat.h> void wp_corpus_default_options(WpCorpusValidationOptions *options) { if (options == 0) { return; } options->document_streams_only = true; options->print_per_file = true; } void wp_corpus_stats_clear(WpCorpusValidationStats *stats) { if (stats != 0) { memset(stats, 0, sizeof(*stats)); } } void wp_corpus_round_trip_stats_clear(WpCorpusRoundTripStats *stats) { if (stats != 0) { memset(stats, 0, sizeof(*stats)); } } void wp_corpus_inventory_stats_clear(WpCorpusInventoryStats *stats) { if (stats != 0) { memset(stats, 0, sizeof(*stats)); } } static char *wp_corpus_join_path(const char *left, const char *right) { size_t left_len; size_t right_len; size_t need_sep; char *out; if (left == 0 || right == 0) { return 0; } left_len = strlen(left); right_len = strlen(right); need_sep = (left_len > 0U && left[left_len - 1U] != '/') ? 1U : 0U; out = (char *)malloc(left_len + need_sep + right_len + 1U); if (out == 0) { return 0; } memcpy(out, left, left_len); if (need_sep != 0U) { out[left_len] = '/'; } memcpy(out + left_len + need_sep, right, right_len); out[left_len + need_sep + right_len] = '\0'; return out; } static size_t wp_corpus_unknown_fixed_packets(const WpDocumentStats *stats) { size_t total = 0U; uint16_t code; if (stats == 0) { return 0U; } for (code = 0xC0U; code < 0xD0U; ++code) { if (stats->code_histogram[code] != 0U && !wp_fixed_code_is_known((uint8_t)code)) { total += stats->code_histogram[code]; } } return total; } static void wp_corpus_dump_unknown_fixed_codes(const WpDocumentStats *stats, FILE *report) { uint16_t code; bool printed = false; size_t count; if (stats == 0 || report == 0) { return; } for (code = 0xC0U; code < 0xD0U; ++code) { if (wp_fixed_code_is_known((uint8_t)code)) { continue; } count = stats->code_histogram[code]; if (count == 0U) { continue; } fprintf(report, " unknown-fixed:0x%02X count=%lu\n", (unsigned)code, (unsigned long)count); printed = true; } if (!printed) { return; } } static void wp_corpus_dump_fixed_histogram(const WpDocumentStats *stats, FILE *report) { uint16_t code; bool printed = false; if (stats == 0 || report == 0) { return; } for (code = 0xC0U; code < 0xD0U; ++code) { size_t count = stats->code_histogram[code]; if (count == 0U) { continue; } fprintf(report, " fixed-0x%02X=%lu\n", (unsigned)code, (unsigned long)count); printed = true; } if (!printed) { fprintf(report, " fixed-none=0\n"); } } static void wp_corpus_collect_fixed_mismatch_counts(const WpLoadedFile *file, size_t *fixed_mismatch, size_t *fixed_incomplete) { WpLayoutGlobals wl; if (file == 0 || fixed_mismatch == 0 || fixed_incomplete == 0 || file->logical_size == 0U || file->logical_bytes == 0) { return; } memset(fixed_mismatch, 0, sizeof(size_t) * 256U); memset(fixed_incomplete, 0, sizeof(size_t) * 256U); memset(&wl, 0, sizeof(wl)); if (!wp_file_bind_primary_stream((WpLoadedFile *)file, &wl, 4096U)) { return; } while (wl.record_used_bytes > 0) { WpRecord rec; wp_parser_consume_record(&wl, &rec); if (rec.length == 0U) { wp_record_free(&rec); break; } if (rec.type == WP_CODE_FIXED_LENGTH) { if (rec.trailer_present && !rec.trailer_matches) { fixed_mismatch[(size_t)rec.code] ++; } if (!rec.is_complete && !rec.trailer_present) { fixed_incomplete[(size_t)rec.code]++; } } wp_record_free(&rec); } } static bool wp_corpus_validate_primary_bytes(const WpLoadedFile *file, size_t *bytes_consumed) { WpLayoutGlobals wl; size_t consumed = 0U; if (file == 0) { return false; } memset(&wl, 0, sizeof(wl)); if (!wp_file_bind_primary_stream((WpLoadedFile *)file, &wl, 4096U)) { return false; } while (wl.record_used_bytes > 0) { WpRecord rec; wp_parser_consume_record(&wl, &rec); if (rec.length == 0U) { wp_record_free(&rec); break; } consumed += (size_t)rec.length; wp_record_free(&rec); } if (bytes_consumed != 0) { *bytes_consumed = consumed; } return true; } static bool wp_corpus_document_stats_have_no_parser_residuals(const WpDocumentStats *stats) { if (stats == 0) { return false; } return stats->unknown_single_byte_records == 0U && stats->d4_unknown_records == 0U && stats->d4_trailing_unparsed_bytes == 0U && stats->variable_generic_packets == 0U && stats->nested_stream_parse_gaps == 0U && stats->nested_stream_recursion_limits == 0U; } static void wp_corpus_dump_parser_residuals(const WpDocumentStats *stats, FILE *report) { if (stats == 0 || report == 0) { return; } fprintf(report, " parser-residuals: unknown-single=%lu d4-unknown=%lu d4-unknown-bytes=%lu d4-trailing=%lu variable-generic=%lu nested-gaps=%lu nested-recursion-limits=%lu\n", (unsigned long)stats->unknown_single_byte_records, (unsigned long)stats->d4_unknown_records, (unsigned long)stats->d4_unknown_payload_bytes, (unsigned long)stats->d4_trailing_unparsed_bytes, (unsigned long)stats->variable_generic_packets, (unsigned long)stats->nested_stream_parse_gaps, (unsigned long)stats->nested_stream_recursion_limits); } static void wp_corpus_dump_fixed_trailer_stats(const WpLoadedFile *file, FILE *report) { size_t fixed_mismatch[256]; size_t fixed_incomplete[256]; uint16_t code; bool printed = false; if (report == 0) { return; } wp_corpus_collect_fixed_mismatch_counts(file, fixed_mismatch, fixed_incomplete); for (code = 0xC0U; code < 0xD0U; ++code) { if (fixed_mismatch[code] != 0U || fixed_incomplete[code] != 0U) { fprintf(report, " fixed-stat-0x%02X incomplete=%lu mismatch=%lu\n", (unsigned)code, (unsigned long)fixed_incomplete[code], (unsigned long)fixed_mismatch[code]); printed = true; } } if (!printed) { fprintf(report, " fixed-mismatch-stats-none\n"); } } static void wp_corpus_accumulate(WpCorpusValidationStats *total, const WpLoadedFile *file, const WpDocumentStats *stats, size_t unknown_fixed) { size_t i; if (total == 0 || file == 0 || stats == 0) { return; } total->document_files_seen++; total->body_bytes += file->logical_size; total->records_seen += stats->records_seen; total->top_level_records_seen += stats->top_level_records_seen; total->top_level_bytes_consumed += stats->top_level_bytes_consumed; total->char_records += stats->char_records; total->single_byte_codes += stats->single_byte_codes; total->fixed_length_codes += stats->fixed_length_codes; total->variable_length_codes += stats->variable_length_codes; total->payload_bytes += stats->payload_bytes; total->ascii_control_records += stats->ascii_control_records; total->printable_text_records += stats->printable_text_records; total->single_byte_format_records += stats->single_byte_format_records; total->packet_records += stats->packet_records; total->line_break_records += stats->line_break_records; total->page_break_records += stats->page_break_records; total->hard_returns += stats->hard_returns; total->soft_returns += stats->soft_returns; total->hard_pages += stats->hard_pages; total->soft_pages += stats->soft_pages; total->tabs += stats->tabs; total->indents += stats->indents; total->dormant_returns += stats->dormant_returns; total->dormant_pages += stats->dormant_pages; total->normalized_spaces += stats->normalized_spaces; total->normalized_hyphens += stats->normalized_hyphens; total->whitespace_gate_records += stats->whitespace_gate_records; total->render_dirty_gate_records += stats->render_dirty_gate_records; total->dirty_exempt_records += stats->dirty_exempt_records; total->dirty_neutral_records += stats->dirty_neutral_records; total->extension_scan_stop_records += stats->extension_scan_stop_records; total->tsm_highlight_records += stats->tsm_highlight_records; total->hyphenation_suppression_records += stats->hyphenation_suppression_records; total->hidden_function_gate_records += stats->hidden_function_gate_records; total->unknown_single_byte_records += stats->unknown_single_byte_records; total->display_columns += stats->display_columns; total->fixed_known_packets += stats->fixed_known_packets; total->fixed_unknown_packets += stats->fixed_unknown_packets; total->fixed_mirrored_packets += stats->fixed_mirrored_packets; total->fixed_zero_payload_packets += stats->fixed_zero_payload_packets; total->extended_character_packets += stats->extended_character_packets; total->attribute_begin_packets += stats->attribute_begin_packets; total->attribute_end_packets += stats->attribute_end_packets; total->d4_layout_state_records += stats->d4_layout_state_records; total->d4_layout_state_payload_bytes += stats->d4_layout_state_payload_bytes; total->d4_pending_span_records += stats->d4_pending_span_records; total->d4_line_window_records += stats->d4_line_window_records; total->d4_line_window_extension_bytes += stats->d4_line_window_extension_bytes; total->d4_line_metric_records += stats->d4_line_metric_records; total->d4_line_metric_extension_bytes += stats->d4_line_metric_extension_bytes; total->d4_extension_fragment_records += stats->d4_extension_fragment_records; total->d4_extension_fragment_bytes += stats->d4_extension_fragment_bytes; total->d4_line_build_checkpoint_records += stats->d4_line_build_checkpoint_records; total->d4_word_pair_checkpoint_records += stats->d4_word_pair_checkpoint_records; total->d4_position_marker_records += stats->d4_position_marker_records; total->d4_control_word_records += stats->d4_control_word_records; total->d4_compact_metric_records += stats->d4_compact_metric_records; total->d4_layout_anchor_records += stats->d4_layout_anchor_records; total->d4_unknown_records += stats->d4_unknown_records; total->d4_unknown_payload_bytes += stats->d4_unknown_payload_bytes; total->d4_trailing_unparsed_bytes += stats->d4_trailing_unparsed_bytes; total->d4_extension_block_records += stats->d4_extension_block_records; total->d4_extension_block_bytes += stats->d4_extension_block_bytes; total->variable_structural_packets += stats->variable_structural_packets; total->variable_generic_packets += stats->variable_generic_packets; total->d1_definition_packets += stats->d1_definition_packets; total->d2_outline_packets += stats->d2_outline_packets; total->d3_generated_text_packets += stats->d3_generated_text_packets; total->repeat_group_packets += stats->repeat_group_packets; total->delayed_text_packets += stats->delayed_text_packets; total->box_object_packets += stats->box_object_packets; total->table_layout_packets += stats->table_layout_packets; total->system_command_packets += stats->system_command_packets; total->d0_initial_format_tables += stats->d0_initial_format_tables; total->d0_format_table_words += stats->d0_format_table_words; total->d0_format_table_groups += stats->d0_format_table_groups; total->d0_format_table_sentinels += stats->d0_format_table_sentinels; total->d0_format_table_nonmonotonic_steps += stats->d0_format_table_nonmonotonic_steps; if (stats->d0_format_table_first_step != 0U && total->d0_format_table_first_step == 0U) { total->d0_format_table_first_step = stats->d0_format_table_first_step; } if (stats->d0_format_table_max_value > total->d0_format_table_max_value) { total->d0_format_table_max_value = stats->d0_format_table_max_value; } total->incomplete_records += stats->incomplete_records; total->mismatched_trailers += stats->mismatched_trailers; total->unknown_fixed_packets += unknown_fixed; total->nested_stream_hints += stats->nested_stream_hints; total->nested_streams_analyzed += stats->nested_streams_analyzed; total->nested_stream_records += stats->nested_stream_records; total->nested_stream_bytes += stats->nested_stream_bytes; if (stats->nested_stream_max_depth > total->nested_stream_max_depth) { total->nested_stream_max_depth = stats->nested_stream_max_depth; } total->nested_stream_parse_gaps += stats->nested_stream_parse_gaps; total->nested_stream_recursion_limits += stats->nested_stream_recursion_limits; total->variable_repeat_dispatch_packets += stats->variable_repeat_dispatch_packets; total->variable_repeat_total += stats->variable_repeat_total; total->variable_extension_scan_packets += stats->variable_extension_scan_packets; total->variable_refcount_increment_packets += stats->variable_refcount_increment_packets; total->variable_refcount_decrement_packets += stats->variable_refcount_decrement_packets; total->scanner_bypass_packets += stats->scanner_bypass_packets; for (i = 0U; i < 256U; ++i) { total->code_histogram[i] += stats->code_histogram[i]; } for (i = 0U; i < (size_t)WP_VARIABLE_ACTION_COUNT; ++i) { total->variable_action_histogram[i] += stats->variable_action_histogram[i]; } } static size_t wp_corpus_count_bits(uint8_t value) { size_t count = 0U; while (value != 0U) { count += (size_t)(value & 1U); value = (uint8_t)(value >> 1U); } return count; } static size_t wp_corpus_merge_byte_bitmap(uint8_t *dest, const uint8_t *src) { size_t added = 0U; size_t i; if (dest == 0 || src == 0) { return 0U; } for (i = 0U; i < WP_RES_BYTE_BITMAP_BYTES; ++i) { uint8_t new_bits = (uint8_t)(src[i] & (uint8_t)~dest[i]); if (new_bits != 0U) { added += wp_corpus_count_bits(new_bits); dest[i] |= new_bits; } } return added; } static bool wp_corpus_validate_file(const char *path, FILE *report, const WpCorpusValidationOptions *options, WpCorpusValidationStats *stats) { WpFileHeader header; WpLoadedFile file; WpDocumentStats doc_stats; size_t unknown_fixed; size_t primary_consumed; bool primary_bytes_ok; bool parser_residuals_ok; bool file_ok; if (stats != 0) { stats->paths_seen++; } if (!wp_file_read_header(path, &header)) { return true; /* Non-WP files are ignored by corpus validation. */ } if (stats != 0) { stats->wp_files_seen++; } if (options != 0 && options->document_streams_only && header.file_type != 0x0AU) { if (stats != 0) { stats->skipped_wp_files++; } return true; } if (!wp_file_load_body(path, &file)) { if (stats != 0) { stats->load_failures++; stats->files_failed++; } if (report != 0 && options != 0 && options->print_per_file) { fprintf(report, "BAD %s load-failed\n", path); } return false; } if (!wp_document_analyze_loaded_file(&file, &doc_stats)) { wp_file_free(&file); if (stats != 0) { stats->files_failed++; } if (report != 0 && options != 0 && options->print_per_file) { fprintf(report, "BAD %s analyze-failed\n", path); } return false; } unknown_fixed = wp_corpus_unknown_fixed_packets(&doc_stats); primary_consumed = 0U; primary_bytes_ok = wp_corpus_validate_primary_bytes(&file, &primary_consumed); parser_residuals_ok = wp_corpus_document_stats_have_no_parser_residuals(&doc_stats); file_ok = primary_bytes_ok && (primary_consumed == file.logical_size) && doc_stats.incomplete_records == 0U && doc_stats.mismatched_trailers == 0U && unknown_fixed == 0U && parser_residuals_ok; if (stats != 0) { wp_corpus_accumulate(stats, &file, &doc_stats, unknown_fixed); if (file_ok) { stats->files_passed++; } else { stats->files_failed++; } } if (report != 0 && options != 0 && options->print_per_file) { fprintf(report, "%s %s type=0x%02X body=%lu records=%lu expanded=%lu fixed=%lu var=%lu incomplete=%lu bad-trailer=%lu unknown-fixed=%lu generic=%lu residual=%lu\n", file_ok ? "OK " : "BAD", path, (unsigned)file.header.file_type, (unsigned long)file.logical_size, (unsigned long)doc_stats.top_level_records_seen, (unsigned long)doc_stats.records_seen, (unsigned long)doc_stats.fixed_length_codes, (unsigned long)doc_stats.variable_length_codes, (unsigned long)doc_stats.incomplete_records, (unsigned long)doc_stats.mismatched_trailers, (unsigned long)unknown_fixed, (unsigned long)doc_stats.variable_generic_packets, parser_residuals_ok ? 0UL : 1UL); if (!file_ok && doc_stats.incomplete_records == 0U && doc_stats.mismatched_trailers == 0U && unknown_fixed == 0U && parser_residuals_ok) { fprintf(report, " bytes-mismatch: body=%lu analyzed=%lu\n", (unsigned long)file.logical_size, (unsigned long)primary_consumed); } if (!file_ok && (doc_stats.incomplete_records != 0U || doc_stats.mismatched_trailers != 0U || unknown_fixed != 0U)) { wp_corpus_dump_fixed_histogram(&doc_stats, report); if (unknown_fixed != 0U) { wp_corpus_dump_unknown_fixed_codes(&doc_stats, report); } wp_corpus_dump_fixed_trailer_stats(&file, report); } if (!parser_residuals_ok) { wp_corpus_dump_parser_residuals(&doc_stats, report); } } wp_file_free(&file); return file_ok; } static bool wp_corpus_validate_path_inner(const char *path, FILE *report, const WpCorpusValidationOptions *options, WpCorpusValidationStats *stats) { struct stat st; DIR *dir; struct dirent *entry; bool ok = true; if (path == 0) { return false; } if (stat(path, &st) != 0) { return false; } if (S_ISDIR(st.st_mode)) { dir = opendir(path); if (dir == 0) { return false; } while ((entry = readdir(dir)) != 0) { char *child; if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) { continue; } child = wp_corpus_join_path(path, entry->d_name); if (child == 0) { ok = false; continue; } if (!wp_corpus_validate_path_inner(child, report, options, stats)) { ok = false; } free(child); } closedir(dir); return ok; } if (S_ISREG(st.st_mode)) { return wp_corpus_validate_file(path, report, options, stats); } if (stats != 0) { stats->paths_seen++; } return true; } bool wp_corpus_validate_path(const char *path, FILE *report, const WpCorpusValidationOptions *options, WpCorpusValidationStats *stats) { WpCorpusValidationOptions local_options; WpCorpusValidationStats local_stats; bool ok; if (path == 0) { return false; } if (options == 0) { wp_corpus_default_options(&local_options); options = &local_options; } wp_corpus_stats_clear(&local_stats); ok = wp_corpus_validate_path_inner(path, report, options, &local_stats); if (local_stats.files_failed != 0U || local_stats.load_failures != 0U) { ok = false; } if (stats != 0) { *stats = local_stats; } return ok; } static void wp_corpus_inventory_accumulate(WpCorpusInventoryStats *stats, const WpResourceFileAnalysis *analysis) { const WpResourceFileInfo *info; if (analysis == 0) { return; } info = &analysis->info; if (stats == 0 || info == 0) { return; } stats->wp_files_seen++; stats->files_passed++; stats->bytes_seen += info->file_size; stats->prefix_bytes += info->prefix_size; stats->body_bytes += info->body_size; stats->file_type_histogram[info->header.file_type]++; if (info->family < WP_RES_FAMILY_COUNT) { stats->family_histogram[info->family]++; } if (info->has_body) { stats->files_with_body++; } else { stats->prefix_only_files++; } if (analysis->has_macro_summary) { const WpMacroStreamSummary *macro = &analysis->macro; unsigned opcode; stats->macro_files++; stats->macro_words += macro->words; stats->macro_literal_words += macro->literal_words; stats->macro_control_words += macro->control_words; stats->macro_command_words += macro->command_words; stats->macro_extended_words += macro->extended_words; stats->macro_zero_words += macro->zero_words; stats->macro_title_bytes += macro->title_bytes; stats->macro_preview_chars += macro->literal_preview_length; if (macro->odd_body_size) { stats->macro_odd_body_files++; } if (macro->has_title) { stats->macro_titled_files++; } for (opcode = 0U; opcode < WP_RES_MACRO_COMMAND_OPCODE_COUNT; ++opcode) { if (macro->command_histogram[opcode] == 0U) { continue; } if (stats->macro_command_histogram[opcode] == 0U) { stats->macro_unique_command_opcodes++; } stats->macro_command_histogram[opcode] += macro->command_histogram[opcode]; } } if (analysis->has_keyboard_summary) { const WpKeyboardLayoutSummary *keyboard = &analysis->keyboard; stats->keyboard_files++; stats->keyboard_slots += keyboard->slots; stats->keyboard_empty_slots += keyboard->empty_slots; stats->keyboard_bound_slots += keyboard->bound_slots; stats->keyboard_literal_refs += keyboard->literal_refs; stats->keyboard_macro_refs += keyboard->macro_refs; stats->keyboard_command_refs += keyboard->command_refs; stats->keyboard_macro_command_refs += keyboard->macro_command_refs; stats->keyboard_other_refs += keyboard->other_refs; stats->keyboard_trailing_bytes += keyboard->trailing_bytes; stats->keyboard_unique_literal_refs += wp_corpus_merge_byte_bitmap(stats->keyboard_literal_ref_bitmap, keyboard->literal_ref_bitmap); stats->keyboard_unique_macro_refs += wp_corpus_merge_byte_bitmap(stats->keyboard_macro_ref_bitmap, keyboard->macro_ref_bitmap); stats->keyboard_unique_command_refs += wp_corpus_merge_byte_bitmap(stats->keyboard_command_ref_bitmap, keyboard->command_ref_bitmap); stats->keyboard_unique_macro_command_refs += wp_corpus_merge_byte_bitmap(stats->keyboard_macro_command_ref_bitmap, keyboard->macro_command_ref_bitmap); stats->keyboard_unique_entry_classes += wp_corpus_merge_byte_bitmap(stats->keyboard_entry_class_bitmap, keyboard->entry_class_bitmap); stats->keyboard_descriptor_records += keyboard->descriptor_records; stats->keyboard_descriptor_bytes += keyboard->descriptor_bytes; stats->keyboard_section_entries += keyboard->section_entries; stats->keyboard_valid_sections += keyboard->valid_sections; stats->keyboard_invalid_sections += keyboard->invalid_sections; stats->keyboard_section_payload_bytes += keyboard->section_payload_bytes; if (keyboard->has_descriptor) { stats->keyboard_descriptor_files++; } if (keyboard->has_binding_section) { stats->keyboard_binding_section_files++; } if (keyboard->has_descriptor_section) { stats->keyboard_descriptor_section_files++; } } if (analysis->has_printer_summary) { const WpPrinterResourceSummary *printer = &analysis->printer; stats->printer_files++; stats->printer_body_words += printer->body_words; stats->printer_zero_words += printer->zero_words; stats->printer_ffff_words += printer->ffff_words; stats->printer_offset_like_words += printer->offset_like_words; if (printer->odd_body_size) { stats->printer_odd_body_files++; } if (printer->has_name) { stats->printer_named_files++; } } if (analysis->has_generic_summary && info->family != WP_RES_FAMILY_DOCUMENT_STYLE) { const WpGenericResourceSummary *generic = &analysis->generic; stats->resource_generic_files++; stats->resource_generic_prefix_bytes += generic->prefix_bytes; stats->resource_generic_body_bytes += generic->body_bytes; stats->resource_generic_body_words += generic->body_words; stats->resource_generic_zero_words += generic->zero_words; stats->resource_generic_ffff_words += generic->ffff_words; stats->resource_generic_offset_like_words += generic->offset_like_words; stats->resource_generic_printable_runs += generic->printable_runs; stats->resource_generic_printable_bytes += generic->printable_bytes; stats->resource_generic_length_strings += generic->length_prefixed_strings; stats->resource_generic_length_string_bytes += generic->length_prefixed_string_bytes; if (generic->has_string) { stats->resource_generic_string_files++; } if (generic->odd_body_size) { stats->resource_generic_odd_body_files++; } } } static bool wp_corpus_inventory_file(const char *path, FILE *report, WpCorpusInventoryStats *stats) { WpFileHeader header; WpResourceFileAnalysis analysis; if (stats != 0) { stats->paths_seen++; } if (!wp_file_read_header(path, &header)) { return true; } if (!wp_res_analyze_file(path, &analysis)) { if (stats != 0) { stats->wp_files_seen++; stats->files_failed++; stats->load_failures++; } if (report != 0) { fprintf(report, "WPBAD %s load-failed\n", path); } return false; } wp_corpus_inventory_accumulate(stats, &analysis); if (report != 0) { fprintf(report, "WPOK %s type=0x%02X kind=%s version=%u.%u prefix=%lu body=%lu bytes=%lu\n", path, (unsigned)analysis.info.header.file_type, wp_res_family_name(analysis.info.family), (unsigned)analysis.info.header.major_version, (unsigned)analysis.info.header.minor_version, (unsigned long)analysis.info.prefix_size, (unsigned long)analysis.info.body_size, (unsigned long)analysis.info.file_size); if (analysis.has_macro_summary) { fprintf(report, " macro-words=%lu literal=%lu control=%lu command=%lu unique-cmd=%lu top-cmd=0x%02X:%lu extended=%lu zero=%lu title=\"%s\" preview=\"%s\" odd-body=%u\n", (unsigned long)analysis.macro.words, (unsigned long)analysis.macro.literal_words, (unsigned long)analysis.macro.control_words, (unsigned long)analysis.macro.command_words, (unsigned long)analysis.macro.unique_command_opcodes, (unsigned)analysis.macro.most_common_command_opcode, (unsigned long)analysis.macro.most_common_command_count, (unsigned long)analysis.macro.extended_words, (unsigned long)analysis.macro.zero_words, analysis.macro.title, analysis.macro.literal_preview, analysis.macro.odd_body_size ? 1U : 0U); } if (analysis.has_keyboard_summary) { fprintf(report, " keyboard-slots=%lu bound=%lu empty=%lu literal=%lu/%lu macro=%lu/%lu command=%lu/%lu macro-command=%lu/%lu classes=%lu sections=%lu/%lu invalid-sections=%lu section-bytes=%lu binding-section=%lu descriptor-section=%lu descriptors=%lu descriptor-bytes=%lu first-desc=\"%s\" longest-desc=\"%s\" other=%lu trailing=%lu\n", (unsigned long)analysis.keyboard.slots, (unsigned long)analysis.keyboard.bound_slots, (unsigned long)analysis.keyboard.empty_slots, (unsigned long)analysis.keyboard.literal_refs, (unsigned long)analysis.keyboard.unique_literal_refs, (unsigned long)analysis.keyboard.macro_refs, (unsigned long)analysis.keyboard.unique_macro_refs, (unsigned long)analysis.keyboard.command_refs, (unsigned long)analysis.keyboard.unique_command_refs, (unsigned long)analysis.keyboard.macro_command_refs, (unsigned long)analysis.keyboard.unique_macro_command_refs, (unsigned long)analysis.keyboard.unique_entry_classes, (unsigned long)analysis.keyboard.section_entries, (unsigned long)analysis.keyboard.valid_sections, (unsigned long)analysis.keyboard.invalid_sections, (unsigned long)analysis.keyboard.section_payload_bytes, (unsigned long)analysis.keyboard.binding_section_bytes, (unsigned long)analysis.keyboard.descriptor_section_bytes, (unsigned long)analysis.keyboard.descriptor_records, (unsigned long)analysis.keyboard.descriptor_bytes, analysis.keyboard.first_descriptor, analysis.keyboard.longest_descriptor, (unsigned long)analysis.keyboard.other_refs, (unsigned long)analysis.keyboard.trailing_bytes); } if (analysis.has_printer_summary) { fprintf(report, " printer-name=\"%s\" name-offset=0x%04X valid-name-offset=%u header-words=0x%04X,0x%04X,0x%04X body-words=%lu zero=%lu ffff=%lu offset-like=%lu highest-offset-like=0x%04X odd-body=%u\n", analysis.printer.name, (unsigned)analysis.printer.name_offset, analysis.printer.name_offset_valid ? 1U : 0U, (unsigned)analysis.printer.header_word0, (unsigned)analysis.printer.header_word1, (unsigned)analysis.printer.header_word2, (unsigned long)analysis.printer.body_words, (unsigned long)analysis.printer.zero_words, (unsigned long)analysis.printer.ffff_words, (unsigned long)analysis.printer.offset_like_words, (unsigned)analysis.printer.highest_offset_like_word, analysis.printer.odd_body_size ? 1U : 0U); } if (analysis.has_generic_summary && analysis.info.family != WP_RES_FAMILY_DOCUMENT_STYLE) { fprintf(report, " resource-generic: body-words=%lu zero=%lu ffff=%lu offset-like=%lu highest-offset-like=0x%04X printable-runs=%lu printable-bytes=%lu lp-strings=%lu lp-string-bytes=%lu first-string=\"%s\" longest-string=\"%s\" odd-body=%u\n", (unsigned long)analysis.generic.body_words, (unsigned long)analysis.generic.zero_words, (unsigned long)analysis.generic.ffff_words, (unsigned long)analysis.generic.offset_like_words, (unsigned)analysis.generic.highest_offset_like_word, (unsigned long)analysis.generic.printable_runs, (unsigned long)analysis.generic.printable_bytes, (unsigned long)analysis.generic.length_prefixed_strings, (unsigned long)analysis.generic.length_prefixed_string_bytes, analysis.generic.first_string, analysis.generic.longest_string, analysis.generic.odd_body_size ? 1U : 0U); } } return true; } static bool wp_corpus_inventory_path_inner(const char *path, FILE *report, WpCorpusInventoryStats *stats) { struct stat st; DIR *dir; struct dirent *entry; bool ok = true; if (path == 0) { return false; } if (stat(path, &st) != 0) { return false; } if (S_ISDIR(st.st_mode)) { dir = opendir(path); if (dir == 0) { return false; } while ((entry = readdir(dir)) != 0) { char *child; if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) { continue; } child = wp_corpus_join_path(path, entry->d_name); if (child == 0) { ok = false; continue; } if (!wp_corpus_inventory_path_inner(child, report, stats)) { ok = false; } free(child); } closedir(dir); return ok; } if (S_ISREG(st.st_mode)) { return wp_corpus_inventory_file(path, report, stats); } if (stats != 0) { stats->paths_seen++; } return true; } bool wp_corpus_inventory_path(const char *path, FILE *report, WpCorpusInventoryStats *stats) { WpCorpusInventoryStats local_stats; bool ok; if (path == 0) { return false; } wp_corpus_inventory_stats_clear(&local_stats); ok = wp_corpus_inventory_path_inner(path, report, &local_stats); if (local_stats.files_failed != 0U || local_stats.load_failures != 0U) { ok = false; } if (stats != 0) { *stats = local_stats; } return ok; } #ifndef __WATCOMC__ static bool wp_corpus_compare_files(const char *left_path, const char *right_path, size_t *bytes_compared) { FILE *left_file; FILE *right_file; int left; int right; size_t count = 0U; bool ok = true; if (left_path == 0 || right_path == 0) { return false; } left_file = fopen(left_path, "rb"); if (left_file == 0) { return false; } right_file = fopen(right_path, "rb"); if (right_file == 0) { fclose(left_file); return false; } for (;;) { left = fgetc(left_file); right = fgetc(right_file); if (left == EOF || right == EOF) { ok = (left == EOF && right == EOF && ferror(left_file) == 0 && ferror(right_file) == 0); break; } if (left != right) { ok = false; break; } count++; } fclose(left_file); fclose(right_file); if (bytes_compared != 0) { *bytes_compared = count; } return ok; } static char *wp_corpus_round_trip_temp_path(const char *path) { const char *prefix = "/tmp/wp_port_rt_"; const char *suffix = ".tmp"; size_t prefix_len; size_t path_len; size_t suffix_len; char *out; size_t i; if (path == 0) { return 0; } prefix_len = strlen(prefix); path_len = strlen(path); suffix_len = strlen(suffix); out = (char *)malloc(prefix_len + path_len + suffix_len + 1U); if (out == 0) { return 0; } memcpy(out, prefix, prefix_len); for (i = 0U; i < path_len; ++i) { char ch = path[i]; if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '.' || ch == '_' || ch == '-') { out[prefix_len + i] = ch; } else { out[prefix_len + i] = '_'; } } memcpy(out + prefix_len + path_len, suffix, suffix_len); out[prefix_len + path_len + suffix_len] = '\0'; return out; } static bool wp_corpus_round_trip_file(const char *path, FILE *report, WpCorpusRoundTripStats *stats) { WpFileHeader header; WpLoadedFile file; WpFilePreservationInfo preserve; FILE *probe; char *round_trip_path; size_t bytes_compared; bool ok; if (stats != 0) { stats->paths_seen++; } if (!wp_file_read_header(path, &header)) { return true; /* Non-WP files are ignored by round-trip validation. */ } if (stats != 0) { stats->wp_files_seen++; stats->file_type_histogram[header.file_type]++; } if (!wp_file_load_body(path, &file)) { if (stats != 0) { stats->load_failures++; stats->files_failed++; } if (report != 0) { fprintf(report, "RTBAD %s load-failed\n", path); } return false; } if (!wp_file_preservation_info(&file, &preserve) || !preserve.byte_stable) { if (stats != 0) { stats->validation_failures++; stats->files_failed++; } if (report != 0) { fprintf(report, "RTBAD %s validation-flags=0x%08lX\n", path, (unsigned long)preserve.validation_flags); } wp_file_free(&file); return false; } if (stats != 0) { stats->preservation_validated++; stats->prefix_bytes += preserve.prefix_size; stats->body_bytes += preserve.body_size; } round_trip_path = wp_corpus_round_trip_temp_path(path); if (round_trip_path == 0) { wp_file_free(&file); if (stats != 0) { stats->write_failures++; stats->files_failed++; } if (report != 0) { fprintf(report, "RTBAD %s temp-name-failed\n", path); } return false; } probe = fopen(round_trip_path, "rb"); if (probe != 0) { fclose(probe); free(round_trip_path); wp_file_free(&file); if (stats != 0) { stats->write_failures++; stats->files_failed++; } if (report != 0) { fprintf(report, "RTBAD %s temp-exists\n", path); } return false; } if (!wp_file_save(round_trip_path, &file)) { free(round_trip_path); wp_file_free(&file); if (stats != 0) { stats->write_failures++; stats->files_failed++; } if (report != 0) { fprintf(report, "RTBAD %s write-failed\n", path); } return false; } bytes_compared = 0U; ok = wp_corpus_compare_files(path, round_trip_path, &bytes_compared); remove(round_trip_path); free(round_trip_path); if (stats != 0) { stats->bytes_compared += bytes_compared; if (ok) { stats->files_passed++; } else { stats->compare_failures++; stats->files_failed++; } } if (report != 0) { fprintf(report, "%s %s type=0x%02X prefix=%lu body=%lu bytes=%lu hash=0x%08lX\n", ok ? "RTOK " : "RTBAD", path, (unsigned)file.header.file_type, (unsigned long)file.prefix_size, (unsigned long)file.logical_size, (unsigned long)bytes_compared, (unsigned long)preserve.full_hash); } wp_file_free(&file); return ok; } static bool wp_corpus_round_trip_path_inner(const char *path, FILE *report, WpCorpusRoundTripStats *stats) { struct stat st; DIR *dir; struct dirent *entry; bool ok = true; if (path == 0) { return false; } if (stat(path, &st) != 0) { return false; } if (S_ISDIR(st.st_mode)) { dir = opendir(path); if (dir == 0) { return false; } while ((entry = readdir(dir)) != 0) { char *child; if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) { continue; } child = wp_corpus_join_path(path, entry->d_name); if (child == 0) { ok = false; continue; } if (!wp_corpus_round_trip_path_inner(child, report, stats)) { ok = false; } free(child); } closedir(dir); return ok; } if (S_ISREG(st.st_mode)) { return wp_corpus_round_trip_file(path, report, stats); } if (stats != 0) { stats->paths_seen++; } return true; } bool wp_corpus_round_trip_path(const char *path, FILE *report, WpCorpusRoundTripStats *stats) { WpCorpusRoundTripStats local_stats; bool ok; if (path == 0) { return false; } wp_corpus_round_trip_stats_clear(&local_stats); ok = wp_corpus_round_trip_path_inner(path, report, &local_stats); if (local_stats.files_failed != 0U || local_stats.load_failures != 0U || local_stats.write_failures != 0U || local_stats.validation_failures != 0U || local_stats.compare_failures != 0U) { ok = false; } if (stats != 0) { *stats = local_stats; } return ok; } #else bool wp_corpus_round_trip_path(const char *path, FILE *report, WpCorpusRoundTripStats *stats) { (void)path; if (stats != 0) { wp_corpus_round_trip_stats_clear(stats); } if (report != 0) { fprintf(report, "RTBAD round-trip save validation is host-only in the DOS scaffold\n"); } return false; } #endif
Commit message
This repository is read-only for this account.
Repository snapshot
Current branch
main
Visibility
public
Your access
Read
Remote
None
File activity
View file history