From e9e5e46b4360a6475c8548b8cf7f665497b341eb Mon Sep 17 00:00:00 2001 From: saint Date: Fri, 16 Aug 2024 02:49:02 +1000 Subject: [PATCH] goss-cleanup.rb and goss-delalluser-20240527.rb fixes --- goss-cleanup.rb | 16 +- goss-delalluser-20240527.rb | 4 +- gosss.rb | 658 ++++++++++++++++++++++++++++++++++++ 3 files changed, 670 insertions(+), 8 deletions(-) create mode 100644 gosss.rb diff --git a/goss-cleanup.rb b/goss-cleanup.rb index 2a2614d..df0b914 100644 --- a/goss-cleanup.rb +++ b/goss-cleanup.rb @@ -1,5 +1,9 @@ -# v0.13 -require File.expand_path("../../../config/environment", __FILE__) +# Federated Computer, Inc. +# David Sainty 2024 A.D. +# Gossamer Threads to Discourse -- CleanUp Script +# v0.14 Fix for Prod-Bitnami. Prep for 20240816 run. + +require File.expand_path("../../../../config/environment", __FILE__) class GossamerForumsCleaner def cleanup_users @@ -73,10 +77,10 @@ class GossamerForumsCleaner def perform_cleanup puts "Cleanup beginning!" # cleanup_messages - cleanup_posts - cleanup_topics - cleanup_categories -# cleanup_users +# cleanup_posts +# cleanup_topics +# cleanup_categories + cleanup_users puts "Cleanup complete!" end end diff --git a/goss-delalluser-20240527.rb b/goss-delalluser-20240527.rb index e987c96..2e8d999 100644 --- a/goss-delalluser-20240527.rb +++ b/goss-delalluser-20240527.rb @@ -1,8 +1,8 @@ # Load the Discourse environment -require File.expand_path("../../../config/environment", __FILE__) +require File.expand_path("../../../../config/environment", __FILE__) # Define usernames to exclude from deletion -excluded_usernames = ["saint", "discobot", "system"] +excluded_usernames = ["saint", "discobot", "system","admin"] # Find all users except the excluded ones users_to_delete = User.where.not(username: excluded_usernames) diff --git a/gosss.rb b/gosss.rb new file mode 100644 index 0000000..940c431 --- /dev/null +++ b/gosss.rb @@ -0,0 +1,658 @@ +# gossamer threads migration-import code +# v0.12 + +require 'mysql2' +require 'open-uri' +require 'net/http' +require 'tempfile' +require 'sqlite3' + +require 'digest' +require 'fileutils' +require 'csv' +require 'time' + +require File.expand_path("../../../config/environment", __FILE__) +require_relative 'base' + +class GossamerForumsImporter < ImportScripts::Base + def initialize + super + begin + # Initialize MySQL client to connect to Gossamer Forums database + @mysql_client = Mysql2::Client.new( + host: "slot.northend.network", + username: "admin", + password: "yxnh93Ybbz2Nm8#mp28zCVv", + database: "slot" + ) + rescue Mysql2::Error => e + puts "Error connecting to MySQL: #{e.message}" + exit 1 + end + +# # Create a mapping of old Gossamer user IDs to new Discourse user IDs +# @user_id_map = {} + initialize_sqlite_id_name_url_db + end + + def initialize_sqlite_id_name_url_db + @db = SQLite3::Database.new 'id_name_url_map.db' + @db.execute <<-SQL + CREATE TABLE IF NOT EXISTS user_id_map ( + old_user_id INTEGER PRIMARY KEY, + new_user_id INTEGER + ); + SQL + @db.execute <<-SQL + CREATE TABLE IF NOT EXISTS category_id_map ( + old_category_id INTEGER PRIMARY KEY, + new_category_id INTEGER + ); + SQL + @db.execute <<-SQL + CREATE TABLE IF NOT EXISTS username_map ( + id INTEGER PRIMARY KEY, + old_username TEXT, + new_username TEXT, + email TEXT, + real_name TEXT + ); + SQL + @db.execute <<-SQL + CREATE TABLE IF NOT EXISTS url_map ( + old_post_id INTEGER PRIMARY KEY, + new_url TEXT, + title TEXT +# created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + SQL + end + + def insert_user_id_mapping(old_user_id, new_user_id) + @db.execute "INSERT OR REPLACE INTO user_id_map (old_user_id, new_user_id) VALUES (?, ?)", old_user_id, new_user_id + end + + def fetch_user_id_mapping(old_user_id) + @db.get_first_value "SELECT new_user_id FROM user_id_map WHERE old_user_id = ?", old_user_id + end + + def insert_category_id_mapping(old_category_id, new_category_id) + @db.execute "INSERT OR REPLACE INTO category_id_map (old_category_id, new_category_id) VALUES (?, ?)", old_category_id, new_category_id + end + + def fetch_category_id_mapping(old_category_id) + @db.get_first_value "SELECT new_category_id FROM category_id_map WHERE old_category_id = ?", old_category_id + end + + def insert_username_mapping(old_username, new_username, email, real_name) + @db.execute "INSERT INTO username_map (old_username, new_username, email, real_name) VALUES (?, ?, ?, ?)", old_username, new_username, email, real_name + end + + # Define a method to export the username mapping table to a CSV file + def export_username_mapping_to_csv(filename) + CSV.open(filename, 'w') do |csv| + # Add headers + csv << ['Old Username', 'New Username', 'Email', 'Full Name'] + + # Fetch data from the database + @db.execute("SELECT old_username, new_username, email, real_name FROM username_map") do |row| + csv << row + end + end + puts "Exported changed username mappings to #{filename}" + end + + # Insert a URL mapping into the SQLite database + def insert_url_mapping(old_post_id, new_url, title) + @db.execute "INSERT INTO url_map (old_post_id, new_url, title) VALUES (?, ?, ?)", [old_post_id, new_url, title] + end + + # Export the URL mappings to a CSV file + def export_url_mapping_to_csv(filename) + CSV.open(filename, "w") do |csv| + # Add headers + csv << ["Old Post ID", "New URL", "Title"] + @db.execute("SELECT old_post_id, new_url, title FROM url_map") do |row| + csv << row + end + end + puts "Exported URL mappings to #{filename}" + end + + # Method to create Nginx rewrite rules file + def create_nginx_rewrite_rules(filename) + File.open(filename, "w") do |file| + @db.execute("SELECT old_post_id, new_url FROM url_map") do |row| + old_post_id, new_url = row + file.puts "rewrite ^/forum/.*P#{old_post_id}/$ #{new_url} permanent;" + end + end + end + + # Execute an SQL query on the Gossamer Forums database + def execute_query(query) + @mysql_client.query(query, as: :hash) + end + + # Sanitize the username to meet Discourse's requirements + def sanitize_username(original_username, email, real_name) +# original_username = username + sanitized_username = username.gsub(/[^a-zA-Z0-9._-]/, '_') + sanitized_username = "#{sanitized_username}." if sanitized_username.length < 2 # Allow two-character usernames + sanitized_username = sanitized_username[0, 20] if sanitized_username.length > 20 + firststep_sanitized = sanitized_username + + existing_user = User.find_by(username: sanitized_username) + + if existing_user + if existing_user.email.downcase == email.downcase && existing_user.name == name + return sanitized_username + else + counter = 1 + while User.exists?(username: sanitized_username) + sanitized_username = "#{firststep_sanitized}_#{counter}" + sanitized_username = sanitized_username[0, 20] if sanitized_username.length > 20 + counter += 1 + end + end + end + + if original_username != sanitized_username + # The Discourse username is not the same as the Gossamer Forums username + puts "Sanitized username: '#{original_username}' --> '#{sanitized_username}'" + insert_username_mapping(original_username, sanitized_username, email, real_name) +# else +# puts "UNsanitized username: '#{original_username}' --> '#{sanitized_username}'" + end + + sanitized_username + end + + # Sanitize email to replace restricted domains + def sanitize_email(email) + restricted_domains = ['mailinator.com', 'example.com'] # Add more restricted domains as needed + domain = email.split('@').last + + if restricted_domains.include?(domain) + sanitized_email = email.gsub(domain, 'example.org') # Change to a permissible domain + puts "Sanitized email: '#{email}' --> '#{sanitized_email}'" + return sanitized_email + end + + email + end + + # Helper method to download an image from a URL + def download_image(url) + begin + URI.open(url).read + rescue OpenURI::HTTPError => e + puts "Failed to download image from #{url}: #{e.message}" + nil + rescue URI::InvalidURIError => e + puts "Failed to handle invalid URL/URI for #{url}: #{e.message}" + nil + end + end + + def upload_image(user, file, filename, gossamer_url) + begin + upload = Upload.create!( + user_id: user.id, + original_filename: filename, + filesize: file.size, +# filesize: File.size(file.path), +# content_type: `file --brief --mime-type #{file.path}`.strip, +# sha1: Digest::SHA1.file(file.path).hexdigest, +# origin: 'user_avatar', +# retain_hours: nil, + url: gossamer_url + ) +# Error -- non-existent method upload.ensure_consistency! + + # Move the file to the correct location +# FileUtils.mv(file.path, upload.path) + upload.save! + + upload + rescue => e + puts "Failed to upload image #{filename} for user #{user.username}: #{e.message}" + nil + end + end + + +# def download_file(url) +# require 'open-uri' +# begin +# file = Tempfile.new +# file.binmode +# file.write(URI.open(url).read) +# file.rewind +# file +# rescue => e +# puts "Failed to download file from #{url}: #{e.message}" +# nil +# end +# end + + # Helper method to upload an image to Discourse +# def upload_image(user, image_data, filename) +# return if image_data.nil? +# +# upload = Upload.create_for(user.id, File.open(image_data.path), filename, 'image/jpeg') +# if upload.nil? || !upload.persisted? +# puts "Failed to upload image for user #{user.username}" +# return +# end +# +# upload +# end + + + # Import users from Gossamer Forums to Discourse + def import_users + puts "Importing users..." + users = [] + + # Fetch all users from Gossamer Forums + execute_query("SELECT * FROM gforum_User").each do |row| + users << { + id: row['user_id'], + username: sanitize_username(row['user_username'], row['user_email'], row['user_real_name']), + email: row['user_email'], + created_at: Time.at(row['user_registered']), + updated_at: Time.at(row['user_last_seen']), + name: row['user_real_name'], + title: row['user_title'], + bio_raw: row['user_about'] || "", + website: row['user_homepage'], + location: row['user_location'], + custom_fields: { + md5_password: row['user_password'], + original_username: row['user_username'], + original_gossamer_id: row['user_id'] + } + } + end + + # Create or update users in Discourse + create_users(users) do |user| +# insert_user_id_mapping(user[:id], user.id) + user + end + + # For each user, add user ID mapping to SQLite now that we know what the Discourse user ID is, ... and append user bio and import user files + users.each do |discourse_user| + # discourse_username = sanitize_username(user[:username], user[:email], user[:name]) + # discourse_user = User.find_by(username: discourse_username) + + # if discourse_user.nil? + # puts "User #{user[:username]} --> #{discourse_username} not found in Discourse. Skipping file import." + # next + # end + +# # Store the user ID mapping +# @user_id_map[user[:id]] = discourse_user.id + puts "for insert_user_id_mapping: user[:id] #{user[:id]} discourse_user.id #{discourse_user.id}" + insert_user_id_mapping(user[:id], discourse_user.id) + + # Ensure user profile exists and bio_raw is a string + discourse_user.user_profile ||= UserProfile.new(user_id: discourse_user.id) + discourse_user.user_profile.bio_raw ||= "" + + # Append bio if it exists, otherwise set it to empty string to avoid nil errors + if discourse_user.user_profile.bio_raw.empty? + discourse_user.user_profile.bio_raw = user[:bio_raw] + else + discourse_user.user_profile.bio_raw += "\n\n" + user[:bio_raw] + end + + # Ensure the bio does not exceed 3000 characters + if discourse_user.user_profile.bio_raw.length > 3000 + puts "Warning: About Me for user #{discourse_user.username} (ID: #{discourse_user.id}) exceeds 3000 characters. Truncating." + discourse_user.user_profile.bio_raw = discourse_user.user_profile.bio_raw[0, 3000] + end + discourse_user.user_profile.save! + + # Import user files + import_user_files(discourse_user) + end + end + +# # Import user files from Gossamer Forums to Discourse +# def import_user_files(user) +# print "\rImporting files for user #{user.username}..." +# +# original_gossamer_id = user.custom_fields['original_gossamer_id'] +# if original_gossamer_id.nil? || original_gossamer_id.empty? +# puts "User #{user.username} does not have a valid original_gossamer_id. Skipping file import." +# return +# end +# +# # puts "Original Gossamer ID for user #{user.username}: #{original_gossamer_id}" +# +# # Fetch and import user files +# execute_query("SELECT * FROM gforum_User_Files WHERE ForeignColKey = #{original_gossamer_id}").each do |file| +# # Construct the file URL +# file_url = "https://forum.slot.com/images/users/images/#{file['ID'] % 10}/#{file['ID']}-#{file['File_Name']}" +# puts "User #{user.username} User ID: #{user.id} original_gossamer_id: #{original_gossamer_id} file_url: #{file_url}" +# +# new_bio = user.user_profile.bio_raw + "\n\n![#{file['File_Name']}](#{file_url})" +# if new_bio.length > 3000 +# puts "Warning: About Me for user #{user.username} (ID: #{user.id}) exceeds 3000 characters after adding file link. Truncating." +# new_bio = new_bio[0, 3000] +# end +# user.user_profile.bio_raw = new_bio +# user.user_profile.save! +# end +# print "Importing files for user #{user.username}... Done.\n" +# end + + # Import user files (profile images) from Gossamer Forums to Discourse + def import_user_files(user) + print "\rImporting files for user #{user.username}..." + + original_gossamer_id = user.custom_fields['original_gossamer_id'] + if original_gossamer_id.nil? || original_gossamer_id.empty? + puts "User #{user.username} does not have a valid original_gossamer_id. Skipping file import." + return + end + + puts "Original Gossamer ID for user #{user.username}: #{original_gossamer_id}" + + images_imported = 0 + + execute_query("SELECT * FROM gforum_User_Files WHERE ForeignColKey = #{original_gossamer_id}").each do |file| + file_url = "https://forum.slot.com/images/users/images/#{file['ID'] % 10}/#{file['ID']}-#{file['File_Name']}" + puts "User #{user.username} User ID: #{user.id} original_gossamer_id: #{original_gossamer_id} file_url: #{file_url}" + + next unless file['ForeignColName'] =~ /^user_image\d+$/ + puts "#A" + next unless ['image/jpeg', 'image/png'].include?(file['File_MimeType']) + puts "#B" + + image_data = download_image(file_url) + next if image_data.nil? + puts "#C" + + temp_file = Tempfile.new(['user_image', File.extname(file['File_Name'])]) + temp_file.binmode + temp_file.write(image_data) + temp_file.rewind + + if images_imported == 0 + puts "#D" + upload = upload_image(user, temp_file, file['File_Name'], file_url) + next if upload.nil? + + user.user_avatar = UserAvatar.create!(user_id: user.id, custom_upload_id: upload.id) + user.save! + + # Set the Profile Header + UserProfile.find_by(user_id: user.id).update!(profile_background_upload_id: upload.id) + + # Set the User Card Background + UserProfile.find_by(user_id: user.id).update!(card_background_upload_id: upload.id) + + images_imported += 1 + end + puts "#E" + user.user_profile.bio_raw ||= "" + user.user_profile.bio_raw += "\n\n![#{file['File_Name']}](#{file_url})" + user.user_profile.save! + + temp_file.close + temp_file.unlink + end + print "Importing files for user #{user.username}... Done.\n" + end + + + # Import categories from Gossamer Forums to Discourse + def import_categories + puts "Importing categories (forums)..." + execute_query("SELECT * FROM gforum_Forum").each do |row| + # Only create category if it does not exist + unless CategoryCustomField.exists?(name: 'original_gossamer_id', value: row['forum_id']) + category_name = row['forum_name'] + category_description = row['forum_desc'] || "No description provided" + puts "id #{row['forum_id']} name #{category_name} description #{category_description}" + # Create category in Discourse + category = create_category( + { +# id: row['forum_id'] + 10, + name: category_name, + description: category_description, + created_at: row['forum_last'] ? Time.at(row['forum_last']) : Time.now, + updated_at: row['forum_last'] ? Time.at(row['forum_last']) : Time.now + }, + row['forum_id'] # import_id argument + ) + +# # Map Gossamer forum ID to Discourse category ID for future reference +# @forum_id_map[row['forum_id']] = category.id + +# category.custom_fields.create!(name: 'original_gossamer_id', value: row['forum_id']) + category.custom_fields['original_gossamer_id'] = row['forum_id'] + category.save! + + # Store the user ID mapping + puts "for insert_category_id_mapping: category[:id] #{category[:id]} row['forum_id'] #{row['forum_id']}" + insert_category_id_mapping(row['forum_id'], category[:id]) + end + end + puts "Importing categories... Done." + end + + # Helper function to ensure title meets the minimum length requirement + def ensure_valid_title(title, min_length = 5) + if title.length < min_length + title += "." * (min_length - title.length) # Append dots to make it longer + end + title + end + +# Import topics and posts from Gossamer Forums to Discourse +def import_topics_and_posts + puts "Importing topics and posts..." + + # Execute the query to get all posts ordered by post_id + execute_query("SELECT * FROM gforum_Post ORDER BY post_id").each do |row| + puts "post_id #{row['post_id']} post_root_id #{row['post_root_id']} post_subject/title #{row['post_subject']} forum_id_fk/category_id #{row['forum_id_fk']}" +# discourse_user_id = @user_id_map[row['user_id_fk']] + discourse_user_id = fetch_user_id_mapping(row['user_id_fk']) + discourse_category_id = fetch_category_id_mapping(row['forum_id_fk']) + + puts "discourse_user_id #{discourse_user_id} discourse_category_id #{discourse_category_id}" + next unless discourse_user_id && discourse_category_id + + if row['post_root_id'] == 0 + puts "#1" + # Ensure the title is valid + title = ensure_valid_title(row['post_subject']) + + # Skip if the topic already exists + unless TopicCustomField.exists?(name: 'original_gossamer_id', value: row['post_id']) + # Create the topic + begin + puts "#2" + puts "CREATE TOPIC title #{title} discourse_user_id #{discourse_user_id} category_id #{discourse_category_id}" + topic = Topic.create!( + title: title, + user_id: discourse_user_id, + created_at: Time.at(row['post_time']), + updated_at: Time.at(row['post_latest_reply']), + category_id: discourse_category_id + ) + topic.custom_fields['original_gossamer_id'] = row['post_id'] + topic.save! + + # Create the initial post in the topic + puts "CREATE POST topic.id #{topic.id} discourse_user_id #{discourse_user_id}" + sanitized_post_message = row['post_message']&.tr("\0", '') || "" + post = Post.create!( + topic_id: topic.id, + user_id: discourse_user_id, +# raw: import_attachments(row['post_message'], row['post_id']), +# raw: row['post_message'] || "", + raw: sanitized_post_message, + created_at: Time.at(row['post_time']), + updated_at: Time.at(row['post_latest_reply']) + ) + post.custom_fields['original_gossamer_id'] = row['post_id'] + post.save! + + # Create URL mappings +# old_url = "https://old/forum/#{row['forum_name']}/topics/#{row['post_id']}" + new_url = "https://new/t/#{topic.slug}/#{topic.id}" + insert_url_mapping(row['post_id'], new_url, title) + + rescue ActiveRecord::RecordInvalid => e + puts "Error importing topic with post_id #{row['post_id']}: #{e.message}" + end + end + + else + puts "#3" + # Find the root topic for the post + root_topic_field = TopicCustomField.find_by(name: 'original_gossamer_id', value: row['post_root_id']) + + if root_topic_field + topic_id = root_topic_field.topic_id + + # Find the parent post for the reply + parent_post_field = PostCustomField.find_by(name: 'original_gossamer_id', value: row['post_father_id']) + reply_to_post_number = parent_post_field ? Post.find(parent_post_field.post_id).post_number : nil + + # Create the post in the existing topic + begin + puts "#4" + sanitized_post_message = row['post_message']&.tr("\0", '') || "" + post = Post.create!( + topic_id: topic_id, + user_id: discourse_user_id, +# raw: import_attachments(row['post_message'], row['post_id']), +# raw: row['post_message'] || "", + raw: sanitized_post_message, + created_at: Time.at(row['post_time']), + updated_at: Time.at(row['post_latest_reply']), + reply_to_post_number: reply_to_post_number + ) + post.custom_fields['original_gossamer_id'] = row['post_id'] + post.save! + rescue ActiveRecord::RecordInvalid => e + puts "Error importing post with post_id #{row['post_id']}: #{e.message}" + end + else + puts "Warning: Root topic not found for post_id #{row['post_id']} with post_root_id #{row['post_root_id']}" + end + end + end +end + + +# Import personal messages from gforum_Message table (both inbox and sent messages) +def import_personal_messages + puts "Importing personal (inbox and sendmail) messages..." + execute_query("SELECT * FROM gforum_Message").each do |row| + + from_user_id = fetch_user_id_mapping(row['from_user_id_fk']) + to_user_id = fetch_user_id_mapping(row['to_user_id_fk']) + + next unless from_user_id && to_user_id + + # Skip if the message already exists + unless TopicCustomField.exists?(name: 'original_gossamer_msg_id', value: row['msg_id']) + + # Sanitize the message, ensuring we have an empty string or the content without any \0 + sanitized_message = row['msg_body']&.tr("\0", '') || "" + + # Set default message body if the sanitized message is blank + sanitized_message = " " if sanitized_message.strip.empty? + +# # If we do not change the "min personal message post length" to 1, we need this. +# sanitized_message = sanitized_message.ljust(10, ' ') if sanitized_message.length < 10 + + # Check and set a default title if the original title is nil or empty + title = row['msg_subject']&.strip + title = "" if title.nil? || title.empty? + + puts "IMPORTING title #{row['msg_subject']} user_id #{from_user_id} to_user_id #{to_user_id}" + + # Create a private message topic in Discourse + topic = Topic.create!( + title: row['msg_subject'], + user_id: from_user_id, + archetype: Archetype.private_message, + created_at: Time.at(row['msg_time']), + updated_at: Time.at(row['msg_time']) + ) + topic.custom_fields['original_gossamer_msg_id'] = row['msg_id'] + topic.save! + + # Create the message as a post in the private topic + post = Post.create!( + topic_id: topic.id, + user_id: from_user_id, +# raw: row['msg_body'], + raw: sanitized_message, + created_at: Time.at(row['msg_time']), + updated_at: Time.at(row['msg_time']) + ) + post.custom_fields['original_gossamer_msg_id'] = row['msg_id'] + post.save! + + # Add recipient user to the private message topic + topic.topic_allowed_users.create!(user_id: to_user_id) + end + end +end + + + # Import attachments for a post + def import_post_attachments(post_message, post_id) + # Fetch attachments related to the post + attachments = execute_query("SELECT * FROM gforum_PostAttachment WHERE post_id_fk = #{post_id}") + attachments.each do |attachment| + + # Append attachment links to the post message + file_url = "https://forum.slot.com/images/posts/attachments/#{attachment['ID'] % 10}/#{attachment['ID']}-#{attachment['File_Name']}" + post_message += "\n\n![#{attachment['File_Name']}](#{file_url})" + end +1# post_message + end + + + + + + # Main method to perform the import + def perform_import + # Secret trick to disable RateLimiting protection in Discourse + RateLimiter.disable + + # Set our unique timestamp for this migration run + timestamp = Time.now.strftime("-%y%m%d%H%M%S") + + puts "Starting Gossamer Forums import... #{timestamp}" + + import_users + export_username_mapping_to_csv("gossamer-migration-username-mapping#{timestamp}") + + import_categories + import_topics_and_posts + export_url_mapping_to_csv("gossamer-migration-url-mapping#{timestamp}") + create_nginx_rewrite_rules("gossamer-redirects.conf") + + import_personal_messages + + # import_attachments + + puts "Gossamer Forums import complete! #{timestamp}" + end +end + +GossamerForumsImporter.new.perform_import +