v0.6 Further attempt to get this reverse dobule encoding right now
This commit is contained in:
		@@ -44,26 +44,6 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
 | 
				
			|||||||
    corrected_content
 | 
					    corrected_content
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Ensure the content is treated as UTF-8 (even if incorrectly encoded)
 | 
					 | 
				
			||||||
    content.force_encoding('UTF-8')
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
    # Continue decoding until no more invalid sequences are found
 | 
					 | 
				
			||||||
    previous_content = ""
 | 
					 | 
				
			||||||
    while previous_content != content
 | 
					 | 
				
			||||||
      previous_content = content.dup
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
      # Step 1: First attempt to convert from ISO-8859-1 to UTF-8
 | 
					 | 
				
			||||||
      if content.valid_encoding?
 | 
					 | 
				
			||||||
        # Decode from ISO-8859-1 (or Windows-1252) to UTF-8
 | 
					 | 
				
			||||||
        content = content.force_encoding('ISO-8859-1').encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
 | 
					 | 
				
			||||||
      else
 | 
					 | 
				
			||||||
        content = previous_content # Stop if invalid encoding issues arise
 | 
					 | 
				
			||||||
      end
 | 
					 | 
				
			||||||
    end
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return content
 | 
					 | 
				
			||||||
end
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
#    # Step 1: Try to detect encoding of the corrupted (double-encoded) content
 | 
					#    # Step 1: Try to detect encoding of the corrupted (double-encoded) content
 | 
				
			||||||
#    detection = CharlockHolmes::EncodingDetector.detect(broken_content)
 | 
					#    detection = CharlockHolmes::EncodingDetector.detect(broken_content)
 | 
				
			||||||
#    original_encoding = detection[:encoding]
 | 
					#    original_encoding = detection[:encoding]
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user