diff --git a/goss-correctencoding.rb b/goss-correctencoding.rb index 944ed52..6206896 100644 --- a/goss-correctencoding.rb +++ b/goss-correctencoding.rb @@ -1,7 +1,7 @@ # Federated Computer, Inc. # David Sainty 2024 A.D. # Gossamer Threads to Discourse -- Correct Encoding -# v0.6 Further attempt to get this reverse dobule encoding right now +# v0.7 Further attempt to get this reverse dobule encoding right now require 'mysql2' require 'active_record' @@ -33,8 +33,18 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base # Method to detect and fix text encoding def fix_text_encoding(content) begin - # Treat as Windows-1252 (cp1252) and then decode into UTF-8 +# # Treat as Windows-1252 (cp1252) and then decode into UTF-8 +# corrected_content = content.encode('CP1252').force_encoding('UTF-8') + # Step 1: Treat content as CP1252 and convert it back to UTF-8 corrected_content = content.encode('CP1252').force_encoding('UTF-8') + + # Step 2: Check if there's still a problem (if still corrupted, apply the second pass) + if corrected_content.valid_encoding? + return corrected_content + else + # Step 3: If it's not valid UTF-8, re-encode and try to fix remaining issues + corrected_content.encode('UTF-8', invalid: :replace, undef: :replace) + end rescue Encoding::UndefinedConversionError => e puts "Error during encoding conversion: #{e.message}" puts e.backtrace.join("\n") # Print the full stack trace