Word Unperfect
public
Read
Owner: themaster
Branch: main
Commits: 0
Git CLI clone URL
git clone https://www.xt-emporium.com/git/word-unperfect.git
Fullscreen desktop URL
Code
Commits
History
Branches
Bug Reports
Discussions
Compare
Settings
word-unperfect
/
rev
/
wp_document_analyzer.c
File editor
#include "wp_document_analyzer.h" #include "wp_control_codes.h" #include "wp_fixed_codes.h" #include "wp_record_parser.h" #include "wp_variable_codes.h" #include "wp_nested_stream.h" #include <string.h> void wp_document_stats_clear(WpDocumentStats *stats) { if (stats != NULL) { memset(stats, 0, sizeof(*stats)); } } const char *wp_document_code_label(uint8_t code, uint8_t sub_code) { if (code < 0xC0U) { return wp_control_code_label(code); } if (code < 0xD0U) { return wp_fixed_code_name(code); } return wp_variable_code_name(code, sub_code); } static void wp_document_stats_add_control_info(WpDocumentStats *stats, const WpRecord *rec) { WpControlSummaryStats control; if (stats == NULL || rec == NULL) { return; } wp_control_summary_stats_clear(&control); if (!wp_control_summary_add_record(&control, rec)) { return; } stats->ascii_control_records += control.ascii_control_records; stats->printable_text_records += control.printable_text_records; stats->single_byte_format_records += control.single_byte_format_records; stats->packet_records += control.packet_records; stats->line_break_records += control.line_break_records; stats->page_break_records += control.page_break_records; stats->hard_returns += control.hard_returns; stats->soft_returns += control.soft_returns; stats->hard_pages += control.hard_pages; stats->soft_pages += control.soft_pages; stats->tabs += control.tabs; stats->indents += control.indents; stats->dormant_returns += control.dormant_returns; stats->dormant_pages += control.dormant_pages; stats->normalized_spaces += control.normalized_spaces; stats->normalized_hyphens += control.normalized_hyphens; stats->whitespace_gate_records += control.whitespace_gate_records; stats->render_dirty_gate_records += control.render_dirty_gate_records; stats->dirty_exempt_records += control.dirty_exempt_records; stats->dirty_neutral_records += control.dirty_neutral_records; stats->extension_scan_stop_records += control.extension_scan_stop_records; stats->tsm_highlight_records += control.tsm_highlight_records; stats->hyphenation_suppression_records += control.hyphenation_suppression_records; stats->hidden_function_gate_records += control.hidden_function_gate_records; stats->unknown_single_byte_records += control.unknown_single_byte_records; stats->display_columns += control.display_columns; } static void wp_document_stats_add_fixed_info(WpDocumentStats *stats, const WpRecord *rec) { const WpFixedCodeInfo *info; if (stats == NULL || rec == NULL || rec->type != WP_CODE_FIXED_LENGTH) { return; } info = wp_fixed_code_info(rec->code); if (info == NULL) { stats->fixed_unknown_packets++; return; } stats->fixed_known_packets++; if (info->mirrored_trailer) { stats->fixed_mirrored_packets++; } if (info->payload_length == 0U) { stats->fixed_zero_payload_packets++; } if (rec->code == 0xC0U) { stats->extended_character_packets++; } else if (rec->code == 0xC3U) { stats->attribute_begin_packets++; } else if (rec->code == 0xC4U) { stats->attribute_end_packets++; } } static void wp_document_stats_add_format_table(WpDocumentStats *stats, const WpFormatTableInfo *table) { if (stats == NULL || table == NULL || !table->is_present) { return; } stats->d0_initial_format_tables++; stats->d0_format_table_words += table->word_count; stats->d0_format_table_groups += table->group_count; stats->d0_format_table_sentinels += table->sentinel_word_count; stats->d0_format_table_nonmonotonic_steps += table->nonmonotonic_step_count; if (table->has_positive_step && stats->d0_format_table_first_step == 0U) { stats->d0_format_table_first_step = table->first_positive_step; } if (table->has_values && table->max_value > stats->d0_format_table_max_value) { stats->d0_format_table_max_value = table->max_value; } } static void wp_document_stats_merge_nested(WpDocumentStats *stats, const WpNestedStreamStats *nested) { if (stats == NULL || nested == NULL) { return; } stats->records_seen += nested->records_seen; stats->bytes_consumed += nested->bytes_consumed; stats->char_records += nested->char_records; stats->single_byte_codes += nested->single_byte_codes; stats->fixed_length_codes += nested->fixed_length_codes; stats->variable_length_codes += nested->variable_length_codes; stats->incomplete_records += nested->incomplete_records; stats->mismatched_trailers += nested->mismatched_trailers; stats->ascii_control_records += nested->ascii_control_records; stats->printable_text_records += nested->printable_text_records; stats->single_byte_format_records += nested->single_byte_format_records; stats->packet_records += nested->packet_records; stats->line_break_records += nested->line_break_records; stats->page_break_records += nested->page_break_records; stats->hard_returns += nested->hard_returns; stats->soft_returns += nested->soft_returns; stats->hard_pages += nested->hard_pages; stats->soft_pages += nested->soft_pages; stats->tabs += nested->tabs; stats->indents += nested->indents; stats->dormant_returns += nested->dormant_returns; stats->dormant_pages += nested->dormant_pages; stats->normalized_spaces += nested->normalized_spaces; stats->normalized_hyphens += nested->normalized_hyphens; stats->whitespace_gate_records += nested->whitespace_gate_records; stats->render_dirty_gate_records += nested->render_dirty_gate_records; stats->dirty_exempt_records += nested->dirty_exempt_records; stats->dirty_neutral_records += nested->dirty_neutral_records; stats->extension_scan_stop_records += nested->extension_scan_stop_records; stats->tsm_highlight_records += nested->tsm_highlight_records; stats->hyphenation_suppression_records += nested->hyphenation_suppression_records; stats->hidden_function_gate_records += nested->hidden_function_gate_records; stats->unknown_single_byte_records += nested->unknown_single_byte_records; stats->display_columns += nested->display_columns; stats->fixed_known_packets += nested->fixed_known_packets; stats->fixed_unknown_packets += nested->fixed_unknown_packets; stats->fixed_mirrored_packets += nested->fixed_mirrored_packets; stats->fixed_zero_payload_packets += nested->fixed_zero_payload_packets; stats->extended_character_packets += nested->extended_character_packets; stats->attribute_begin_packets += nested->attribute_begin_packets; stats->attribute_end_packets += nested->attribute_end_packets; stats->d4_layout_state_records += nested->d4_layout_state_records; stats->d4_layout_state_payload_bytes += nested->d4_layout_state_payload_bytes; stats->d4_pending_span_records += nested->d4_pending_span_records; stats->d4_line_window_records += nested->d4_line_window_records; stats->d4_line_window_extension_bytes += nested->d4_line_window_extension_bytes; stats->d4_line_metric_records += nested->d4_line_metric_records; stats->d4_line_metric_extension_bytes += nested->d4_line_metric_extension_bytes; stats->d4_extension_fragment_records += nested->d4_extension_fragment_records; stats->d4_extension_fragment_bytes += nested->d4_extension_fragment_bytes; stats->d4_line_build_checkpoint_records += nested->d4_line_build_checkpoint_records; stats->d4_word_pair_checkpoint_records += nested->d4_word_pair_checkpoint_records; stats->d4_position_marker_records += nested->d4_position_marker_records; stats->d4_control_word_records += nested->d4_control_word_records; stats->d4_compact_metric_records += nested->d4_compact_metric_records; stats->d4_layout_anchor_records += nested->d4_layout_anchor_records; stats->d4_unknown_records += nested->d4_unknown_records; stats->d4_unknown_payload_bytes += nested->d4_unknown_payload_bytes; stats->d4_trailing_unparsed_bytes += nested->d4_trailing_unparsed_bytes; stats->d4_extension_block_records += nested->d4_extension_block_records; stats->d4_extension_block_bytes += nested->d4_extension_block_bytes; stats->variable_structural_packets += nested->variable_structural_packets; stats->variable_generic_packets += nested->variable_generic_packets; stats->d1_definition_packets += nested->d1_definition_packets; stats->d2_outline_packets += nested->d2_outline_packets; stats->d3_generated_text_packets += nested->d3_generated_text_packets; stats->repeat_group_packets += nested->repeat_group_packets; stats->delayed_text_packets += nested->delayed_text_packets; stats->box_object_packets += nested->box_object_packets; stats->table_layout_packets += nested->table_layout_packets; stats->system_command_packets += nested->system_command_packets; stats->nested_stream_hints += nested->variable_nested_stream_hints; stats->nested_streams_analyzed += nested->streams_seen; stats->nested_stream_records += nested->records_seen; stats->nested_stream_bytes += nested->bytes_consumed; if ((size_t)nested->max_depth_seen > stats->nested_stream_max_depth) { stats->nested_stream_max_depth = nested->max_depth_seen; } if (nested->stopped_on_parse_gap) { stats->nested_stream_parse_gaps++; } if (nested->recursion_limit_hit) { stats->nested_stream_recursion_limits++; } stats->variable_repeat_dispatch_packets += nested->variable_repeat_dispatch_packets; stats->variable_repeat_total += nested->variable_repeat_total; stats->variable_extension_scan_packets += nested->variable_extension_scan_packets; stats->variable_refcount_increment_packets += nested->variable_refcount_increment_packets; stats->variable_refcount_decrement_packets += nested->variable_refcount_decrement_packets; stats->scanner_bypass_packets += nested->scanner_bypass_packets; } static void wp_document_stats_add_d4_residual_info(WpDocumentStats *stats, const WpRecord *rec, const WpVariableCommandInfo *info) { bool decoded; if (stats == NULL || rec == NULL || info == NULL || rec->code != 0xD4U) { return; } decoded = false; if (info->has_layout_state) { decoded = true; stats->d4_trailing_unparsed_bytes += info->layout_state.trailing_unparsed_bytes; if (info->layout_state.has_extension_blocks) { stats->d4_extension_block_records++; stats->d4_extension_block_bytes += info->layout_state.extension_block_bytes; } } if (info->has_pending_span) { decoded = true; } if (info->has_line_window) { decoded = true; stats->d4_trailing_unparsed_bytes += info->line_window.trailing_unparsed_bytes; } if (info->has_line_metric) { decoded = true; if (info->line_metric.has_extension_payload) { stats->d4_trailing_unparsed_bytes += info->line_metric.extension.trailing_unparsed_bytes; if (info->line_metric.extension.is_block_list) { stats->d4_extension_block_records++; stats->d4_extension_block_bytes += info->line_metric.extension.block_bytes; } } } if (info->has_extension_fragment) { decoded = true; stats->d4_trailing_unparsed_bytes += info->extension_fragment.trailing_unparsed_bytes; if (info->extension_fragment.is_block_list) { stats->d4_extension_block_records++; stats->d4_extension_block_bytes += info->extension_fragment.block_bytes; } } if (info->has_line_build_checkpoint) { decoded = true; } if (info->has_word_pair_checkpoint) { decoded = true; } if (info->has_position_marker) { decoded = true; } if (info->has_control_word) { decoded = true; } if (info->has_compact_metric) { decoded = true; } if (info->has_layout_anchor) { decoded = true; } if (!decoded) { stats->d4_unknown_records++; stats->d4_unknown_payload_bytes += rec->data_length; } } static bool wp_document_variable_has_decoded_semantics(const WpVariableCommandInfo *info) { if (info == NULL) { return false; } return info->has_initial_format_table || info->has_layout_state || info->has_pending_span || info->has_line_window || info->has_line_metric || info->has_extension_fragment || info->has_line_build_checkpoint || info->has_word_pair_checkpoint || info->has_position_marker || info->has_control_word || info->has_compact_metric || info->has_layout_anchor || info->has_definition_payload || info->has_outline_payload || info->has_generated_text_payload || info->has_repeat_group_payload || info->has_delayed_text_payload || info->has_box_object_payload || info->has_table_layout_payload || info->has_system_command_payload || info->scanner_bypass || info->has_nested_stream_hint || (info->has_post_compare_plan && info->post_compare.flags != 0U); } static void wp_document_stats_add_variable_info(WpDocumentStats *stats, const WpRecord *rec, const WpVariableCommandInfo *info) { unsigned action; if (stats == NULL || rec == NULL || info == NULL) { return; } action = (unsigned)info->action; if (action < (unsigned)WP_VARIABLE_ACTION_COUNT) { stats->variable_action_histogram[action]++; } if (info->has_initial_format_table) { wp_document_stats_add_format_table(stats, &info->initial_table); } if (info->has_layout_state) { stats->d4_layout_state_records++; stats->d4_layout_state_payload_bytes += info->layout_state.decoded_payload_bytes; } if (info->has_pending_span) { stats->d4_pending_span_records++; } if (info->has_line_window) { stats->d4_line_window_records++; if (info->line_window.has_extension_payload) { stats->d4_line_window_extension_bytes += info->line_window.extension_payload_bytes; } } if (info->has_line_metric) { stats->d4_line_metric_records++; if (info->line_metric.has_extension_payload) { stats->d4_line_metric_extension_bytes += info->line_metric.extension.extension_payload_bytes; } } if (info->has_extension_fragment) { stats->d4_extension_fragment_records++; stats->d4_extension_fragment_bytes += info->extension_fragment.extension_payload_bytes; } if (info->has_line_build_checkpoint) { stats->d4_line_build_checkpoint_records++; } if (info->has_word_pair_checkpoint) { stats->d4_word_pair_checkpoint_records++; } if (info->has_position_marker) { stats->d4_position_marker_records++; } if (info->has_control_word) { stats->d4_control_word_records++; } if (info->has_compact_metric) { stats->d4_compact_metric_records++; } if (info->has_layout_anchor) { stats->d4_layout_anchor_records++; } wp_document_stats_add_d4_residual_info(stats, rec, info); if (info->has_definition_payload) { stats->d1_definition_packets++; } if (info->has_outline_payload) { stats->d2_outline_packets++; } if (info->has_generated_text_payload) { stats->d3_generated_text_packets++; } if (info->has_repeat_group_payload) { stats->repeat_group_packets++; } if (info->has_delayed_text_payload) { stats->delayed_text_packets++; } if (info->has_box_object_payload) { stats->box_object_packets++; } if (info->has_table_layout_payload) { stats->table_layout_packets++; } if (info->has_system_command_payload) { stats->system_command_packets++; } if (wp_document_variable_has_decoded_semantics(info)) { stats->variable_structural_packets++; } else { stats->variable_generic_packets++; } if (info->scanner_bypass) { stats->scanner_bypass_packets++; } if (info->has_nested_stream_hint) { stats->nested_stream_hints++; } if (info->has_post_compare_plan) { if ((info->post_compare.flags & WP_VARIABLE_POST_REPEAT_DISPATCH) != 0U) { stats->variable_repeat_dispatch_packets++; if (info->post_compare.has_repeat_count) { stats->variable_repeat_total += info->post_compare.repeat_count; } } if (info->post_compare.extension_scan) { stats->variable_extension_scan_packets++; } if (info->post_compare.refcount_increment) { stats->variable_refcount_increment_packets++; } if (info->post_compare.refcount_decrement) { stats->variable_refcount_decrement_packets++; } } if (info->has_nested_stream_hint) { WpNestedStreamStats nested; WpNestedStreamOptions nested_options; wp_nested_stream_default_options(&nested_options); if (wp_nested_stream_analyze_record(rec, &nested_options, &nested)) { wp_document_stats_merge_nested(stats, &nested); } } } static void wp_document_stats_add_record(WpDocumentStats *stats, const WpRecord *rec) { if (stats == NULL || rec == NULL) { return; } stats->records_seen++; stats->bytes_consumed += rec->length; stats->top_level_records_seen++; stats->top_level_bytes_consumed += rec->length; stats->payload_bytes += rec->data_length; stats->code_histogram[rec->code]++; wp_document_stats_add_control_info(stats, rec); if (rec->length > stats->max_record_length) { stats->max_record_length = rec->length; } if (rec->declared_length > stats->max_declared_payload) { stats->max_declared_payload = rec->declared_length; stats->max_declared_code = rec->code; stats->max_declared_sub_code = rec->sub_code; } switch (rec->type) { case WP_CODE_CHAR: stats->char_records++; break; case WP_CODE_SINGLE_BYTE: stats->single_byte_codes++; break; case WP_CODE_FIXED_LENGTH: stats->fixed_length_codes++; wp_document_stats_add_fixed_info(stats, rec); if (!rec->is_complete && !rec->trailer_present) { stats->incomplete_records++; } if (rec->trailer_present && !rec->trailer_matches) { stats->mismatched_trailers++; } break; case WP_CODE_VARIABLE_LENGTH: stats->variable_length_codes++; stats->variable_subcode_histogram[rec->sub_code]++; if (!rec->is_complete && !rec->trailer_present) { stats->incomplete_records++; } if (rec->trailer_present && !rec->trailer_matches) { stats->mismatched_trailers++; } { WpVariableCommandInfo info; if (wp_variable_classify_record(rec, &info)) { wp_document_stats_add_variable_info(stats, rec, &info); } } break; } } bool wp_document_analyze_stream(WpLayoutGlobals *wl, WpDocumentStats *stats) { WpLayoutGlobals cursor; if (wl == NULL || stats == NULL) { return false; } wp_document_stats_clear(stats); cursor = *wl; while (cursor.record_used_bytes > 0) { WpRecord rec; wp_parser_consume_record(&cursor, &rec); if (rec.length == 0) { wp_record_free(&rec); break; } wp_document_stats_add_record(stats, &rec); wp_record_free(&rec); } return true; } bool wp_document_analyze_loaded_file(WpLoadedFile *file, WpDocumentStats *stats) { WpLayoutGlobals wl; if (file == NULL || stats == NULL) { return false; } memset(&wl, 0, sizeof(wl)); if (!wp_file_bind_primary_stream(file, &wl, 4096U)) { return false; } return wp_document_analyze_stream(&wl, stats); } bool wp_document_analyze_file(const char *filename, WpDocumentStats *stats) { WpLoadedFile file; bool ok; if (filename == NULL || stats == NULL) { return false; } if (!wp_file_load_body(filename, &file)) { return false; } ok = wp_document_analyze_loaded_file(&file, stats); wp_file_free(&file); return ok; }
Commit message
This repository is read-only for this account.
Repository snapshot
Current branch
main
Visibility
public
Your access
Read
Remote
None
File activity
View file history