From 3cb70bb79909ae74e7d7cd5ef284168b789b2826 Mon Sep 17 00:00:00 2001 From: saint Date: Mon, 9 Sep 2024 22:15:26 +1000 Subject: [PATCH] v0.6 Further attempt to get this reverse dobule encoding right now --- goss-correctencoding.rb | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/goss-correctencoding.rb b/goss-correctencoding.rb index 479ec47..944ed52 100644 --- a/goss-correctencoding.rb +++ b/goss-correctencoding.rb @@ -44,26 +44,6 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base corrected_content end - # Ensure the content is treated as UTF-8 (even if incorrectly encoded) - content.force_encoding('UTF-8') - - # Continue decoding until no more invalid sequences are found - previous_content = "" - while previous_content != content - previous_content = content.dup - - # Step 1: First attempt to convert from ISO-8859-1 to UTF-8 - if content.valid_encoding? - # Decode from ISO-8859-1 (or Windows-1252) to UTF-8 - content = content.force_encoding('ISO-8859-1').encode('UTF-8', invalid: :replace, undef: :replace, replace: '?') - else - content = previous_content # Stop if invalid encoding issues arise - end - end - - return content -end - # # Step 1: Try to detect encoding of the corrupted (double-encoded) content # detection = CharlockHolmes::EncodingDetector.detect(broken_content) # original_encoding = detection[:encoding]