v0.2 - Add debugging

This commit is contained in:
David Sainty 2024-09-09 16:29:14 +10:00
parent 85c93af281
commit 538185095d

View File

@ -1,7 +1,7 @@
# Federated Computer, Inc. # Federated Computer, Inc.
# David Sainty <saint@federated.computer> 2024 A.D. # David Sainty <saint@federated.computer> 2024 A.D.
# Gossamer Threads to Discourse -- Correct Encoding # Gossamer Threads to Discourse -- Correct Encoding
# v0.1 New script # v0.2 Debugging
require 'mysql2' require 'mysql2'
require 'active_record' require 'active_record'
@ -31,20 +31,22 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
# Method to detect and fix text encoding # Method to detect and fix text encoding
def fix_text_encoding(text) def fix_text_encoding(text)
# Detect encoding begin
detection = CharlockHolmes::Detect.detect(text) # Detect encoding
original_encoding = detection[:encoding] detection = CharlockHolmes::Detect.detect(text)
puts "Original encoding detected: #{original_encoding}" original_encoding = detection[:encoding]
puts "Original encoding detected: #{original_encoding}"
if original_encoding == 'ISO-8859-1'
text.force_encoding('ISO-8859-1').encode('UTF-8') if original_encoding == 'ISO-8859-1'
else text.force_encoding('ISO-8859-1').encode('UTF-8')
# Try to convert from detected encoding to UTF-8 else
text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?') # Try to convert from detected encoding to UTF-8
end text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?')
rescue StandardError => e end
puts "Error during encoding conversion: #{e.message}" rescue StandardError => e
text puts "Error during encoding conversion: #{e.message}"
puts e.backtrace.join("\n") # Print the full stack trace
text
end end
# Method to fix encoding issues in post content # Method to fix encoding issues in post content
@ -52,25 +54,32 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
offset = 0 offset = 0
loop do loop do
posts = Post.limit(@batch_size).offset(offset) puts "OFFSET: #{offset}"
break if posts.empty? begin
posts = Post.limit(@batch_size).offset(offset)
break if posts.empty?
posts.each do |post| posts.each do |post|
raw_content = post.raw raw_content = post.raw
fixed_content = fix_text_encoding(raw_content) puts "--> NEXT POST: post.id: #{post.id}"
if fixed_content != raw_content fixed_content = fix_text_encoding(raw_content)
puts "Updating post ##{post.id}" if fixed_content != raw_content
puts "------- raw_content:\n#{raw_content}" puts "Updating post #{post.id}"
puts "+++++++ fixed_content:\n#{fixed_content}" puts "------- raw_content:\n#{raw_content}"
puts "---------------------------------------------------------------------------------------------" puts "+++++++ fixed_content:\n#{fixed_content}"
# post.update(raw: fixed_content) puts "---------------------------------------------------------------------------------------------"
# post.raw = fixed_content # post.update(raw: fixed_content)
# if post.save # post.raw = fixed_content
# puts "Post ##{post.id} updated successfully." # if post.save
# else # puts "Post ##{post.id} updated successfully."
# puts "Failed to update Post ##{post.id}: #{post.errors.full_messages.join(', ')}" # else
# end # puts "Failed to update Post ##{post.id}: #{post.errors.full_messages.join(', ')}"
# end
end
end end
rescue
puts "Error: #{e.message}"
puts e.backtrace.join("\n") # Print the full stack trace
end end
offset += @batch_size offset += @batch_size