From 736d0afe06c5d20b0a74d1f4ec9307128d2fcae1 Mon Sep 17 00:00:00 2001 From: saint Date: Mon, 9 Sep 2024 22:29:30 +1000 Subject: [PATCH] v0.7 Further attempt to get this reverse dobule encoding right now --- goss-correctencoding.rb | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/goss-correctencoding.rb b/goss-correctencoding.rb index 944ed52..6206896 100644 --- a/goss-correctencoding.rb +++ b/goss-correctencoding.rb @@ -1,7 +1,7 @@ # Federated Computer, Inc. # David Sainty 2024 A.D. # Gossamer Threads to Discourse -- Correct Encoding -# v0.6 Further attempt to get this reverse dobule encoding right now +# v0.7 Further attempt to get this reverse dobule encoding right now require 'mysql2' require 'active_record' @@ -33,8 +33,18 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base # Method to detect and fix text encoding def fix_text_encoding(content) begin - # Treat as Windows-1252 (cp1252) and then decode into UTF-8 +# # Treat as Windows-1252 (cp1252) and then decode into UTF-8 +# corrected_content = content.encode('CP1252').force_encoding('UTF-8') + # Step 1: Treat content as CP1252 and convert it back to UTF-8 corrected_content = content.encode('CP1252').force_encoding('UTF-8') + + # Step 2: Check if there's still a problem (if still corrupted, apply the second pass) + if corrected_content.valid_encoding? + return corrected_content + else + # Step 3: If it's not valid UTF-8, re-encode and try to fix remaining issues + corrected_content.encode('UTF-8', invalid: :replace, undef: :replace) + end rescue Encoding::UndefinedConversionError => e puts "Error during encoding conversion: #{e.message}" puts e.backtrace.join("\n") # Print the full stack trace