v0.2 - Add debugging
This commit is contained in:
		@@ -1,7 +1,7 @@
 | 
			
		||||
# Federated Computer, Inc.
 | 
			
		||||
# David Sainty <saint@federated.computer>  2024 A.D.
 | 
			
		||||
# Gossamer Threads to Discourse -- Correct Encoding
 | 
			
		||||
# v0.1 New script
 | 
			
		||||
# v0.2 Debugging
 | 
			
		||||
 | 
			
		||||
require 'mysql2'
 | 
			
		||||
require 'active_record'
 | 
			
		||||
@@ -31,20 +31,22 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
 | 
			
		||||
 | 
			
		||||
  # Method to detect and fix text encoding
 | 
			
		||||
  def fix_text_encoding(text)
 | 
			
		||||
    # Detect encoding
 | 
			
		||||
    detection = CharlockHolmes::Detect.detect(text)
 | 
			
		||||
    original_encoding = detection[:encoding]
 | 
			
		||||
    puts "Original encoding detected: #{original_encoding}"
 | 
			
		||||
 | 
			
		||||
    if original_encoding == 'ISO-8859-1'
 | 
			
		||||
      text.force_encoding('ISO-8859-1').encode('UTF-8')
 | 
			
		||||
    else
 | 
			
		||||
      # Try to convert from detected encoding to UTF-8
 | 
			
		||||
      text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?')
 | 
			
		||||
    end
 | 
			
		||||
  rescue StandardError => e
 | 
			
		||||
    puts "Error during encoding conversion: #{e.message}"
 | 
			
		||||
    text
 | 
			
		||||
    begin
 | 
			
		||||
      # Detect encoding
 | 
			
		||||
      detection = CharlockHolmes::Detect.detect(text)
 | 
			
		||||
      original_encoding = detection[:encoding]
 | 
			
		||||
      puts "Original encoding detected: #{original_encoding}"
 | 
			
		||||
  
 | 
			
		||||
      if original_encoding == 'ISO-8859-1'
 | 
			
		||||
        text.force_encoding('ISO-8859-1').encode('UTF-8')
 | 
			
		||||
      else
 | 
			
		||||
        # Try to convert from detected encoding to UTF-8
 | 
			
		||||
        text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?')
 | 
			
		||||
      end
 | 
			
		||||
    rescue StandardError => e
 | 
			
		||||
      puts "Error during encoding conversion: #{e.message}"
 | 
			
		||||
      puts e.backtrace.join("\n")  # Print the full stack trace
 | 
			
		||||
      text
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
    # Method to fix encoding issues in post content
 | 
			
		||||
@@ -52,25 +54,32 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
 | 
			
		||||
    offset = 0
 | 
			
		||||
 | 
			
		||||
    loop do
 | 
			
		||||
      posts = Post.limit(@batch_size).offset(offset)
 | 
			
		||||
      break if posts.empty?
 | 
			
		||||
      puts "OFFSET:  #{offset}"
 | 
			
		||||
      begin
 | 
			
		||||
        posts = Post.limit(@batch_size).offset(offset)
 | 
			
		||||
        break if posts.empty?
 | 
			
		||||
 | 
			
		||||
      posts.each do |post|
 | 
			
		||||
        raw_content = post.raw
 | 
			
		||||
        fixed_content = fix_text_encoding(raw_content)
 | 
			
		||||
        if fixed_content != raw_content
 | 
			
		||||
          puts "Updating post ##{post.id}"
 | 
			
		||||
          puts "------- raw_content:\n#{raw_content}"
 | 
			
		||||
          puts "+++++++ fixed_content:\n#{fixed_content}"
 | 
			
		||||
          puts "---------------------------------------------------------------------------------------------"
 | 
			
		||||
          # post.update(raw: fixed_content)
 | 
			
		||||
#          post.raw = fixed_content
 | 
			
		||||
#          if post.save
 | 
			
		||||
#            puts "Post ##{post.id} updated successfully."
 | 
			
		||||
#          else
 | 
			
		||||
#            puts "Failed to update Post ##{post.id}: #{post.errors.full_messages.join(', ')}"
 | 
			
		||||
#          end
 | 
			
		||||
        posts.each do |post|
 | 
			
		||||
          raw_content = post.raw
 | 
			
		||||
          puts "--> NEXT POST:   post.id:  #{post.id}"
 | 
			
		||||
          fixed_content = fix_text_encoding(raw_content)
 | 
			
		||||
          if fixed_content != raw_content
 | 
			
		||||
            puts "Updating post #{post.id}"
 | 
			
		||||
            puts "------- raw_content:\n#{raw_content}"
 | 
			
		||||
            puts "+++++++ fixed_content:\n#{fixed_content}"
 | 
			
		||||
            puts "---------------------------------------------------------------------------------------------"
 | 
			
		||||
            # post.update(raw: fixed_content)
 | 
			
		||||
#            post.raw = fixed_content
 | 
			
		||||
#            if post.save
 | 
			
		||||
#              puts "Post ##{post.id} updated successfully."
 | 
			
		||||
#            else
 | 
			
		||||
#              puts "Failed to update Post ##{post.id}: #{post.errors.full_messages.join(', ')}"
 | 
			
		||||
#            end
 | 
			
		||||
          end
 | 
			
		||||
        end
 | 
			
		||||
      rescue
 | 
			
		||||
        puts "Error:  #{e.message}"
 | 
			
		||||
        puts e.backtrace.join("\n")  # Print the full stack trace
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      offset += @batch_size
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user