v0.7 Further attempt to get this reverse dobule encoding right now
This commit is contained in:
parent
3cb70bb799
commit
736d0afe06
@ -1,7 +1,7 @@
|
|||||||
# Federated Computer, Inc.
|
# Federated Computer, Inc.
|
||||||
# David Sainty <saint@federated.computer> 2024 A.D.
|
# David Sainty <saint@federated.computer> 2024 A.D.
|
||||||
# Gossamer Threads to Discourse -- Correct Encoding
|
# Gossamer Threads to Discourse -- Correct Encoding
|
||||||
# v0.6 Further attempt to get this reverse dobule encoding right now
|
# v0.7 Further attempt to get this reverse dobule encoding right now
|
||||||
|
|
||||||
require 'mysql2'
|
require 'mysql2'
|
||||||
require 'active_record'
|
require 'active_record'
|
||||||
@ -33,8 +33,18 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
|
|||||||
# Method to detect and fix text encoding
|
# Method to detect and fix text encoding
|
||||||
def fix_text_encoding(content)
|
def fix_text_encoding(content)
|
||||||
begin
|
begin
|
||||||
# Treat as Windows-1252 (cp1252) and then decode into UTF-8
|
# # Treat as Windows-1252 (cp1252) and then decode into UTF-8
|
||||||
|
# corrected_content = content.encode('CP1252').force_encoding('UTF-8')
|
||||||
|
# Step 1: Treat content as CP1252 and convert it back to UTF-8
|
||||||
corrected_content = content.encode('CP1252').force_encoding('UTF-8')
|
corrected_content = content.encode('CP1252').force_encoding('UTF-8')
|
||||||
|
|
||||||
|
# Step 2: Check if there's still a problem (if still corrupted, apply the second pass)
|
||||||
|
if corrected_content.valid_encoding?
|
||||||
|
return corrected_content
|
||||||
|
else
|
||||||
|
# Step 3: If it's not valid UTF-8, re-encode and try to fix remaining issues
|
||||||
|
corrected_content.encode('UTF-8', invalid: :replace, undef: :replace)
|
||||||
|
end
|
||||||
rescue Encoding::UndefinedConversionError => e
|
rescue Encoding::UndefinedConversionError => e
|
||||||
puts "Error during encoding conversion: #{e.message}"
|
puts "Error during encoding conversion: #{e.message}"
|
||||||
puts e.backtrace.join("\n") # Print the full stack trace
|
puts e.backtrace.join("\n") # Print the full stack trace
|
||||||
|
Loading…
Reference in New Issue
Block a user