# gossamer threads migration-import code # v0.15.5 require 'mysql2' require 'open-uri' require 'net/http' require 'tempfile' require 'sqlite3' require 'digest' require 'fileutils' require 'csv' require 'time' require File.expand_path("../../../../config/environment", __FILE__) require_relative '../base' class GossamerForumsImporter < ImportScripts::Base def initialize super begin # Initialize MySQL client to connect to Gossamer Forums database @mysql_client = Mysql2::Client.new( host: "slowtwitch.northend.network", username: "admin", password: "yxnh93Ybbz2Nm8#mp28zCVv", database: "slowtwitch" ) rescue Mysql2::Error => e puts "Error connecting to MySQL: #{e.message}" exit 1 end # # Create a mapping of old Gossamer user IDs to new Discourse user IDs # @user_id_map = {} initialize_sqlite_id_name_url_db end def initialize_sqlite_id_name_url_db @db = SQLite3::Database.new 'id_name_url_map.db' @db.execute <<-SQL CREATE TABLE IF NOT EXISTS user_id_map ( old_user_id INTEGER PRIMARY KEY, new_user_id INTEGER ); SQL @db.execute <<-SQL CREATE TABLE IF NOT EXISTS category_id_map ( old_category_id INTEGER PRIMARY KEY, new_category_id INTEGER ); SQL @db.execute <<-SQL CREATE TABLE IF NOT EXISTS username_map ( id INTEGER PRIMARY KEY, old_username TEXT, new_username TEXT, email TEXT, real_name TEXT ); SQL @db.execute <<-SQL CREATE TABLE IF NOT EXISTS url_map ( old_post_id INTEGER PRIMARY KEY, new_url TEXT, title TEXT ); SQL end def insert_user_id_mapping(old_user_id, new_user_id) @db.execute "INSERT OR REPLACE INTO user_id_map (old_user_id, new_user_id) VALUES (?, ?)", old_user_id, new_user_id end def fetch_user_id_mapping(old_user_id) @db.get_first_value "SELECT new_user_id FROM user_id_map WHERE old_user_id = ?", old_user_id end def insert_category_id_mapping(old_category_id, new_category_id) @db.execute "INSERT OR REPLACE INTO category_id_map (old_category_id, new_category_id) VALUES (?, ?)", old_category_id, new_category_id end def fetch_category_id_mapping(old_category_id) @db.get_first_value "SELECT new_category_id FROM category_id_map WHERE old_category_id = ?", old_category_id end def insert_username_mapping(old_username, new_username, email, real_name) @db.execute "INSERT INTO username_map (old_username, new_username, email, real_name) VALUES (?, ?, ?, ?)", old_username, new_username, email, real_name end # Define a method to export the username mapping table to a CSV file def export_username_mapping_to_csv(filename) CSV.open(filename, 'w') do |csv| # Add headers csv << ['Old Username', 'New Username', 'Email', 'Full Name'] # Fetch data from the database @db.execute("SELECT old_username, new_username, email, real_name FROM username_map") do |row| csv << row end end puts "Exported changed username mappings to #{filename}" end # Insert a URL mapping into the SQLite database def insert_url_mapping(old_post_id, new_url, title) @db.execute "INSERT INTO url_map (old_post_id, new_url, title) VALUES (?, ?, ?)", [old_post_id, new_url, title] end # Export the URL mappings to a CSV file def export_url_mapping_to_csv(filename) CSV.open(filename, "w") do |csv| # Add headers csv << ["Old Post ID", "New URL", "Title"] @db.execute("SELECT old_post_id, new_url, title FROM url_map") do |row| csv << row end end puts "Exported URL mappings to #{filename}" end # Method to create Nginx rewrite rules file def create_nginx_rewrite_rules(filename) File.open(filename, "w") do |file| @db.execute("SELECT old_post_id, new_url FROM url_map") do |row| old_post_id, new_url = row file.puts "rewrite ^/forum/.*P#{old_post_id}/$ #{new_url} permanent;" end end end # Execute an SQL query on the Gossamer Forums database def execute_query(query) @mysql_client.query(query, as: :hash) end # Sanitize the username to meet Discourse's requirements def sanitize_username(original_username, email, real_name) # original_username = username sanitized_username = original_username.gsub(/[^a-zA-Z0-9._-]/, '_') sanitized_username = "#{sanitized_username}." if sanitized_username.length < 2 # Allow two-character usernames sanitized_username = sanitized_username[0, 20] if sanitized_username.length > 20 firststep_sanitized = sanitized_username existing_user = User.find_by(username: sanitized_username) if existing_user if existing_user.email.downcase == email.downcase && existing_user.name == real_name # The existing user with the username the same as the current proposed sanitised name _is_ the same person... return sanitized_username else # We cannot clobber another person with the same proposed username, so we resolve the conflict counter = 1 while User.exists?(username: sanitized_username) sanitized_username = "#{firststep_sanitized}_#{counter}" sanitized_username = sanitized_username[0, 20] if sanitized_username.length > 20 counter += 1 end end end if original_username != sanitized_username # The Discourse username is not the same as the Gossamer Forums username puts "Sanitized username: '#{original_username}' --> '#{sanitized_username}'" insert_username_mapping(original_username, sanitized_username, email, real_name) # else # puts "UNsanitized username: '#{original_username}' --> '#{sanitized_username}'" end sanitized_username end # Sanitize email to replace restricted domains def sanitize_email(email) restricted_domains = ['mailinator.com', 'example.com'] # Add more restricted domains as needed domain = email.split('@').last if restricted_domains.include?(domain) sanitized_email = email.gsub(domain, 'example.org') # Change to a permissible domain puts "Sanitized email: '#{email}' --> '#{sanitized_email}'" return sanitized_email end email end # Helper method to download an attachment / image from a URL def download_attachment(url) begin puts "URL: '#{url}'" URI.open(url).read rescue OpenURI::HTTPError => e puts "Failed to download attachment from #{url}: #{e.message}" nil rescue URI::InvalidURIError => e puts "Failed to handle invalid URL/URI for #{url}: #{e.message}" nil end end # Helper method to upload an attachment / image to Discourse def upload_attachment(user, file, filename, gossamer_url) begin upload = Upload.create!( user_id: user.id, original_filename: filename, filesize: file.size, # filesize: File.size(file.path), # content_type: `file --brief --mime-type #{file.path}`.strip, # sha1: Digest::SHA1.file(file.path).hexdigest, # origin: 'user_avatar', # retain_hours: nil, url: gossamer_url ) # Error -- non-existent method upload.ensure_consistency! # Move the file to the correct location # FileUtils.mv(file.path, upload.path) upload.save! upload rescue => e puts "Failed to upload attachment #{filename} for user #{user.username}: #{e.message}" nil end end # Helper method to handle post attachments def handle_post_attachments(gossamer_post_id, post, user_id) execute_query("SELECT * FROM gforum_PostAttachment WHERE post_id_fk = #{post_id}").each do |att_row| attachment_url = "https://forum.slowtwitch.com/forum/?do=post_attachment;postatt_id=#{att_row['postatt_id']}" attachment_data = download_attachment(attachment_url) next unless attachment_data mime_type = att_row['postatt_content'] temp_file = Tempfile.new(['attachment', File.extname(att_row['postatt_filename'])]) temp_file.binmode temp_file.write(attachment_data) temp_file.rewind upload = upload_attachment(user_id, temp_file, att_row['postatt_filename'], attachment_url) next unless upload upload_url = upload.url if mime_type.start_with?('image/') post.raw += "\n![#{att_row['postatt_filename']}](#{upload_url})" else post.raw += "\n[#{att_row['postatt_filename']}](#{upload_url})" end post.save! temp_file.close temp_file.unlink end end # def download_file(url) # require 'open-uri' # begin # file = Tempfile.new # file.binmode # file.write(URI.open(url).read) # file.rewind # file # rescue => e # puts "Failed to download file from #{url}: #{e.message}" # nil # end # end # Helper method to upload an image to Discourse # def upload_image(user, image_data, filename) # return if image_data.nil? # # upload = Upload.create_for(user.id, File.open(image_data.path), filename, 'image/jpeg') # if upload.nil? || !upload.persisted? # puts "Failed to upload image for user #{user.username}" # return # end # # upload # end # Import users from Gossamer Forums to Discourse def import_users puts "Importing users..." users = [] # Fetch all users from Gossamer Forums execute_query("SELECT * FROM gforum_User").each do |row| users << { id: row['user_id'], username: sanitize_username(row['user_username'], row['user_email'], row['user_real_name']), email: row['user_email'], created_at: Time.at(row['user_registered']), updated_at: Time.at(row['user_last_seen']), name: row['user_real_name'], title: row['user_title'], bio_raw: row['user_about'] || "", website: row['user_homepage'], location: row['user_location'], custom_fields: { md5_password: row['user_password'], original_username: row['user_username'], original_gossamer_id: row['user_id'] } } end # Create or update users in Discourse create_users(users) do |user| # insert_user_id_mapping(user[:id], user.id) user end # For each user, add user ID mapping to SQLite now that we know what the Discourse user ID is, ... and append user bio and import user files users.each do |user| # discourse_username = sanitize_username(user[:username], user[:email], user[:name]) discourse_username = user[:username] discourse_user = User.find_by(username: discourse_username) if discourse_user.nil? puts "User #{user[:username]} --> #{discourse_username} not found in Discourse. Skipping file import." next end # # Store the user ID mapping # @user_id_map[user[:id]] = discourse_user.id puts "for insert_user_id_mapping: user[:id] #{user[:id]} discourse_user.id #{discourse_user.id}" insert_user_id_mapping(user[:id], discourse_user.id) # Ensure user profile exists and bio_raw is a string discourse_user.user_profile ||= UserProfile.new(user_id: discourse_user.id) discourse_user.user_profile.bio_raw ||= "" # Append bio if it exists, otherwise set it to empty string to avoid nil errors if discourse_user.user_profile.bio_raw.empty? discourse_user.user_profile.bio_raw = user[:bio_raw] else discourse_user.user_profile.bio_raw += "\n\n" + user[:bio_raw] end # Ensure the bio does not exceed 3000 characters if discourse_user.user_profile.bio_raw.length > 3000 puts "Warning: About Me for user #{discourse_user.username} (ID: #{discourse_user.id}) exceeds 3000 characters. Truncating." discourse_user.user_profile.bio_raw = discourse_user.user_profile.bio_raw[0, 3000] end discourse_user.user_profile.save! # Import user files import_user_files(discourse_user) end end # # Import user files from Gossamer Forums to Discourse # def import_user_files(user) # print "\rImporting files for user #{user.username}..." # # original_gossamer_id = user.custom_fields['original_gossamer_id'] # if original_gossamer_id.nil? || original_gossamer_id.empty? # puts "User #{user.username} does not have a valid original_gossamer_id. Skipping file import." # return # end # # # puts "Original Gossamer ID for user #{user.username}: #{original_gossamer_id}" # # # Fetch and import user files # execute_query("SELECT * FROM gforum_User_Files WHERE ForeignColKey = #{original_gossamer_id}").each do |file| # # Construct the file URL # file_url = "https://forum.slowtwitch.com/images/users/images/#{file['ID'] % 10}/#{file['ID']}-#{file['File_Name']}" # puts "User #{user.username} User ID: #{user.id} original_gossamer_id: #{original_gossamer_id} file_url: #{file_url}" # # new_bio = user.user_profile.bio_raw + "\n\n![#{file['File_Name']}](#{file_url})" # if new_bio.length > 3000 # puts "Warning: About Me for user #{user.username} (ID: #{user.id}) exceeds 3000 characters after adding file link. Truncating." # new_bio = new_bio[0, 3000] # end # user.user_profile.bio_raw = new_bio # user.user_profile.save! # end # print "Importing files for user #{user.username}... Done.\n" # end # Import user files (profile images) from Gossamer Forums to Discourse def import_user_files(user) print "\rImporting files for user #{user.username}..." original_gossamer_id = user.custom_fields['original_gossamer_id'] if original_gossamer_id.nil? || original_gossamer_id.empty? puts "User #{user.username} does not have a valid original_gossamer_id. Skipping file import." return end puts "Original Gossamer ID for user #{user.username}: #{original_gossamer_id}" images_imported = 0 execute_query("SELECT * FROM gforum_User_Files WHERE ForeignColKey = #{original_gossamer_id}").each do |file| file_url = "https://forum.slowtwitch.com/images/users/images/#{file['ID'] % 10}/#{file['ID']}-#{file['File_Name']}" puts "User #{user.username} User ID: #{user.id} original_gossamer_id: #{original_gossamer_id} file_url: #{file_url}" next unless file['ForeignColName'] =~ /^user_image\d+$/ puts "#A" next unless ['image/jpeg', 'image/png'].include?(file['File_MimeType']) puts "#B" image_data = download_attachment(file_url) next if image_data.nil? puts "#C" temp_file = Tempfile.new(['user_image', File.extname(file['File_Name'])]) temp_file.binmode temp_file.write(image_data) temp_file.rewind if images_imported == 0 puts "#D" upload = upload_attachment(user, temp_file, file['File_Name'], file_url) next if upload.nil? user.user_avatar = UserAvatar.create!(user_id: user.id, custom_upload_id: upload.id) user.save! # Set the Profile Header UserProfile.find_by(user_id: user.id).update!(profile_background_upload_id: upload.id) # Set the User Card Background UserProfile.find_by(user_id: user.id).update!(card_background_upload_id: upload.id) images_imported += 1 end puts "#E" user.user_profile.bio_raw ||= "" user.user_profile.bio_raw += "\n\n![#{file['File_Name']}](#{file_url})" user.user_profile.save! temp_file.close temp_file.unlink end print "Importing files for user #{user.username}... Done.\n" end # Import categories from Gossamer Forums to Discourse def import_categories puts "Importing categories (forums)..." execute_query("SELECT * FROM gforum_Forum").each do |row| # Only create category if it does not exist unless CategoryCustomField.exists?(name: 'original_gossamer_id', value: row['forum_id']) category_name = row['forum_name'] category_description = row['forum_desc'] || "No description provided" puts "id #{row['forum_id']} name #{category_name} description #{category_description}" # Create category in Discourse category = create_category( { # id: row['forum_id'] + 10, name: category_name, description: category_description, created_at: row['forum_last'] ? Time.at(row['forum_last']) : Time.now, updated_at: row['forum_last'] ? Time.at(row['forum_last']) : Time.now }, row['forum_id'] # import_id argument ) # # Map Gossamer forum ID to Discourse category ID for future reference # @forum_id_map[row['forum_id']] = category.id # category.custom_fields.create!(name: 'original_gossamer_id', value: row['forum_id']) category.custom_fields['original_gossamer_id'] = row['forum_id'] category.save! # Store the user ID mapping puts "for insert_category_id_mapping: category[:id] #{category[:id]} row['forum_id'] #{row['forum_id']}" insert_category_id_mapping(row['forum_id'], category[:id]) end end puts "Importing categories... Done." end # Helper function to ensure title meets the minimum length requirement def ensure_valid_title(title, min_length = 5) if title.length < min_length title += "." * (min_length - title.length) # Append dots to make it longer end title end # Import topics and posts from Gossamer Forums to Discourse def import_topics_and_posts_with_attachments puts "Importing topics and posts with attachments..." # Execute the query to get all posts ordered by post_id execute_query("SELECT * FROM gforum_Post ORDER BY post_id").each do |row| puts "post_id #{row['post_id']} post_root_id #{row['post_root_id']} post_subject/title #{row['post_subject']} forum_id_fk/category_id #{row['forum_id_fk']}" # discourse_user_id = @user_id_map[row['user_id_fk']] discourse_user_id = fetch_user_id_mapping(row['user_id_fk']) discourse_category_id = fetch_category_id_mapping(row['forum_id_fk']) puts "discourse_user_id #{discourse_user_id} discourse_category_id #{discourse_category_id}" next unless discourse_user_id && discourse_category_id if row['post_root_id'] == 0 puts "#1" # Ensure the title is valid title = ensure_valid_title(row['post_subject']) # Skip if the topic already exists unless TopicCustomField.exists?(name: 'original_gossamer_id', value: row['post_id']) # Create the topic begin puts "#2" puts "CREATE TOPIC title #{title} discourse_user_id #{discourse_user_id} category_id #{discourse_category_id}" topic = Topic.create!( title: title, user_id: discourse_user_id, created_at: Time.at(row['post_time']), updated_at: Time.at(row['post_latest_reply']), category_id: discourse_category_id ) topic.custom_fields['original_gossamer_id'] = row['post_id'] topic.save! # Create the initial post in the topic puts "CREATE POST topic.id #{topic.id} discourse_user_id #{discourse_user_id}" # Ensure the raw post stirng contents itself is acceptable to Discourse sanitized_post_message = row['post_message']&.tr("\0", '') || "" # Remove the [signature] label from appearing at the end of the messages after import sanitized_post_message.sub(/\n?\[signature\]\n?\z/, '') post = Post.create!( topic_id: topic.id, user_id: discourse_user_id, # raw: import_attachments(row['post_message'], row['post_id']), # raw: row['post_message'] || "", raw: sanitized_post_message, created_at: Time.at(row['post_time']), updated_at: Time.at(row['post_latest_reply']) ) post.custom_fields['original_gossamer_id'] = row['post_id'] post.save! # Handle attachments for the post handle_post_attachments(row['post_id'], post, discourse_user_id) # Create URL mappings # old_url = "https://old/forum/#{row['forum_name']}/topics/#{row['post_id']}" new_url = "https://new/t/#{topic.slug}/#{topic.id}" insert_url_mapping(row['post_id'], new_url, title) rescue ActiveRecord::RecordInvalid => e puts "Error importing topic with post_id #{row['post_id']}: #{e.message}" end end else puts "#3" # Find the root topic for the post root_topic_field = TopicCustomField.find_by(name: 'original_gossamer_id', value: row['post_root_id']) if root_topic_field topic_id = root_topic_field.topic_id # Find the parent post for the reply parent_post_field = PostCustomField.find_by(name: 'original_gossamer_id', value: row['post_father_id']) reply_to_post_number = parent_post_field ? Post.find(parent_post_field.post_id).post_number : nil # Create the post in the existing topic begin puts "#4" # Ensure the raw post string contents itself is acceptable to Discourse sanitized_post_message = row['post_message']&.tr("\0", '') || "" # Remove the [signature] label from appearing at the end of the messages after import sanitized_post_message.sub(/\n?\[signature\]\n?\z/, '') post = Post.create!( topic_id: topic_id, user_id: discourse_user_id, # raw: import_attachments(row['post_message'], row['post_id']), # raw: row['post_message'] || "", raw: sanitized_post_message, created_at: Time.at(row['post_time']), updated_at: Time.at(row['post_latest_reply']), reply_to_post_number: reply_to_post_number ) post.custom_fields['original_gossamer_id'] = row['post_id'] post.save! # Handle attachments for the post handle_post_attachments(row['post_id'], post, discourse_user_id) rescue ActiveRecord::RecordInvalid => e puts "Error importing post with post_id #{row['post_id']}: #{e.message}" end else puts "Warning: Root topic not found for post_id #{row['post_id']} with post_root_id #{row['post_root_id']}" end end end end # Import personal messages from gforum_Message table (both inbox and sent messages) def import_personal_messages puts "Importing personal (inbox and sendmail) messages..." execute_query("SELECT * FROM gforum_Message").each do |row| from_user_id = fetch_user_id_mapping(row['from_user_id_fk']) to_user_id = fetch_user_id_mapping(row['to_user_id_fk']) next unless from_user_id && to_user_id # Skip if the message already exists unless TopicCustomField.exists?(name: 'original_gossamer_msg_id', value: row['msg_id']) # Sanitize the message, ensuring we have an empty string or the content without any \0 sanitized_message = row['msg_body']&.tr("\0", '') || "" # Set default message body if the sanitized message is blank sanitized_message = " " if sanitized_message.strip.empty? # # If we do not change the "min personal message post length" to 1, we need this. # sanitized_message = sanitized_message.ljust(10, ' ') if sanitized_message.length < 10 # Check and set a default title if the original title is nil or empty title = row['msg_subject']&.strip title = "" if title.nil? || title.empty? puts "IMPORTING title #{row['msg_subject']} user_id #{from_user_id} to_user_id #{to_user_id}" # Create a private message topic in Discourse topic = Topic.create!( # title: row['msg_subject'], title: title, user_id: from_user_id, archetype: Archetype.private_message, created_at: Time.at(row['msg_time']), updated_at: Time.at(row['msg_time']) ) topic.custom_fields['original_gossamer_msg_id'] = row['msg_id'] topic.save! # Create the message as a post in the private topic post = Post.create!( topic_id: topic.id, user_id: from_user_id, # raw: row['msg_body'], raw: sanitized_message, created_at: Time.at(row['msg_time']), updated_at: Time.at(row['msg_time']) ) post.custom_fields['original_gossamer_msg_id'] = row['msg_id'] post.save! # Add recipient user to the private message topic topic.topic_allowed_users.create!(user_id: to_user_id) # handle_post_attachments(row['msg_id'], post, from_user_id) end end end # Main method to perform the import def perform_import # Secret trick to disable RateLimiting protection in Discourse RateLimiter.disable # Set our unique timestamp for this migration run timestamp = Time.now.strftime("-%y%m%d%H%M%S") puts "Starting Gossamer Forums import... #{timestamp}" import_users export_username_mapping_to_csv("gossamer-migration-username-mapping#{timestamp}") import_categories import_topics_and_posts_with_attachments export_url_mapping_to_csv("gossamer-migration-url-mapping#{timestamp}") create_nginx_rewrite_rules("gossamer-redirects.conf") import_personal_messages puts "Gossamer Forums import complete! #{timestamp}" end end GossamerForumsImporter.new.perform_import