discourse-import_scripts/gossamer_forums.rb

495 lines
18 KiB
Ruby
Raw Normal View History

2024-06-17 09:32:40 +00:00
require 'mysql2'
2024-06-17 09:35:50 +00:00
require 'open-uri'
require 'net/http'
2024-06-17 09:36:54 +00:00
require 'tempfile'
require 'sqlite3'
2024-06-17 09:35:50 +00:00
2024-06-17 09:32:40 +00:00
require File.expand_path("../../../config/environment", __FILE__)
require_relative 'base'
class GossamerForumsImporter < ImportScripts::Base
def initialize
super
2024-06-17 09:36:54 +00:00
begin
# Initialize MySQL client to connect to Gossamer Forums database
@mysql_client = Mysql2::Client.new(
host: "slowtwitch.northend.network",
username: "admin",
password: "yxnh93Ybbz2Nm8#mp28zCVv",
database: "slowtwitch"
)
rescue Mysql2::Error => e
puts "Error connecting to MySQL: #{e.message}"
exit 1
end
# # Create a mapping of old Gossamer user IDs to new Discourse user IDs
# @user_id_map = {}
initialize_sqlite_user_id_db
end
def initialize_sqlite_user_id_db
@db = SQLite3::Database.new 'user_id_map.db'
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS user_id_map (
old_user_id INTEGER PRIMARY KEY,
new_user_id INTEGER
);
SQL
end
def insert_user_id_mapping(old_user_id, new_user_id)
@db.execute "INSERT OR REPLACE INTO user_id_map (old_user_id, new_user_id) VALUES (?, ?)", old_user_id, new_user_id
end
def fetch_user_id_mapping(old_user_id)
@db.get_first_value "SELECT new_user_id FROM user_id_map WHERE old_user_id = ?", old_user_id
2024-06-17 09:32:40 +00:00
end
2024-06-17 09:35:50 +00:00
# Execute an SQL query on the Gossamer Forums database
2024-06-17 09:32:40 +00:00
def execute_query(query)
@mysql_client.query(query, as: :hash)
end
2024-06-17 09:35:50 +00:00
# Sanitize the username to meet Discourse's requirements
2024-06-17 09:33:54 +00:00
def sanitize_username(username, email, name)
original_username = username
sanitized = username.gsub(/[^a-zA-Z0-9._-]/, '_')
2024-06-17 09:35:50 +00:00
sanitized = "#{sanitized}." if sanitized.length < 2 # Allow two-character usernames
2024-06-17 09:33:54 +00:00
sanitized = sanitized[0, 20] if sanitized.length > 20
original_sanitized = sanitized
existing_user = User.find_by(username: sanitized)
if existing_user
2024-06-17 09:34:34 +00:00
if existing_user.email.downcase == email.downcase && existing_user.name == name
2024-06-17 09:33:54 +00:00
return sanitized
else
counter = 1
while User.exists?(username: sanitized)
sanitized = "#{original_sanitized}_#{counter}"
sanitized = sanitized[0, 20] if sanitized.length > 20
counter += 1
end
end
end
if original_username != sanitized
puts "Sanitized username: '#{original_username}' --> '#{sanitized}'"
2024-06-17 09:34:34 +00:00
# else
# puts "UNsanitized username: '#{original_username}' --> '#{sanitized}'"
end
2024-06-17 09:33:54 +00:00
sanitized
end
2024-06-17 09:35:50 +00:00
2024-06-17 09:33:54 +00:00
# Sanitize email to replace restricted domains
def sanitize_email(email)
restricted_domains = ['mailinator.com', 'example.com'] # Add more restricted domains as needed
domain = email.split('@').last
if restricted_domains.include?(domain)
2024-06-17 09:34:34 +00:00
sanitized_email = email.gsub(domain, 'example.org') # Change to a permissible domain
2024-06-17 09:33:54 +00:00
puts "Sanitized email: '#{email}' --> '#{sanitized_email}'"
return sanitized_email
end
email
2024-06-17 09:33:28 +00:00
end
2024-06-17 09:35:50 +00:00
# Helper method to download an image from a URL
def download_image(url)
begin
URI.open(url).read
rescue OpenURI::HTTPError => e
puts "Failed to download image from #{url}: #{e.message}"
nil
end
end
# Helper method to upload an image to Discourse
def upload_image(user, image_data, filename)
return if image_data.nil?
upload = Upload.create_for(user.id, File.open(image_data.path), filename, 'image/jpeg')
if upload.nil? || !upload.persisted?
puts "Failed to upload image for user #{user.username}"
return
end
upload
end
# Import users from Gossamer Forums to Discourse
2024-06-17 09:32:40 +00:00
def import_users
puts "Importing users..."
users = []
2024-06-17 09:35:13 +00:00
# Fetch all users from Gossamer Forums
2024-06-17 09:32:40 +00:00
execute_query("SELECT * FROM gforum_User").each do |row|
users << {
id: row['user_id'],
2024-06-17 09:35:13 +00:00
username: sanitize_username(row['user_username'], row['user_email'], row['user_real_name']),
2024-06-17 09:32:40 +00:00
email: row['user_email'],
created_at: Time.at(row['user_registered']),
updated_at: Time.at(row['user_last_seen']),
name: row['user_real_name'],
title: row['user_title'],
2024-06-17 09:34:34 +00:00
bio_raw: row['user_about'] || "",
2024-06-17 09:32:40 +00:00
website: row['user_homepage'],
2024-06-17 09:34:34 +00:00
location: row['user_location'],
custom_fields: {
2024-06-17 09:35:13 +00:00
md5_password: row['user_password'],
original_username: row['user_username'],
original_gossamer_id: row['user_id']
2024-06-17 09:34:34 +00:00
}
2024-06-17 09:32:40 +00:00
}
end
2024-06-17 09:35:13 +00:00
# Create or update users in Discourse
2024-06-17 09:32:40 +00:00
create_users(users) do |user|
2024-06-17 09:36:54 +00:00
# insert_user_id_mapping(user[:id], user.id)
2024-06-17 09:32:40 +00:00
user
end
2024-06-17 09:36:54 +00:00
# For each user, add user ID mapping to @user_id_map now that we know what the Discourse user ID is, ... and append user bio and import user files
2024-06-17 09:32:40 +00:00
users.each do |user|
2024-06-17 09:35:13 +00:00
discourse_username = sanitize_username(user[:username], user[:email], user[:name])
discourse_user = User.find_by(username: discourse_username)
2024-06-17 09:33:28 +00:00
if discourse_user.nil?
2024-06-17 09:35:13 +00:00
puts "User #{user[:username]} --> #{discourse_username} not found in Discourse. Skipping file import."
2024-06-17 09:33:28 +00:00
next
end
2024-06-17 09:33:54 +00:00
2024-06-17 09:36:54 +00:00
# # Store the user ID mapping
# @user_id_map[user[:id]] = discourse_user.id
insert_user_id_mapping(user[:id], discourse_user.id)
# Ensure user profile exists and bio_raw is a string
discourse_user.user_profile ||= UserProfile.new(user_id: discourse_user.id)
2024-06-17 09:35:50 +00:00
discourse_user.user_profile.bio_raw ||= ""
2024-06-17 09:36:54 +00:00
# Append bio if it exists, otherwise set it to empty string to avoid nil errors
2024-06-17 09:35:50 +00:00
if discourse_user.user_profile.bio_raw.empty?
2024-06-17 09:35:13 +00:00
discourse_user.user_profile.bio_raw = user[:bio_raw]
else
discourse_user.user_profile.bio_raw += "\n\n" + user[:bio_raw]
end
2024-06-17 09:35:50 +00:00
# Ensure the bio does not exceed 3000 characters
if discourse_user.user_profile.bio_raw.length > 3000
2024-06-17 09:35:13 +00:00
puts "Warning: About Me for user #{discourse_user.username} (ID: #{discourse_user.id}) exceeds 3000 characters. Truncating."
2024-06-17 09:35:50 +00:00
discourse_user.user_profile.bio_raw = discourse_user.user_profile.bio_raw[0, 3000]
2024-06-17 09:35:13 +00:00
end
2024-06-17 09:34:34 +00:00
discourse_user.user_profile.save!
2024-06-17 09:32:40 +00:00
2024-06-17 09:35:50 +00:00
# Import user files
2024-06-17 09:36:54 +00:00
# import_user_files(discourse_user)
2024-06-17 09:32:40 +00:00
end
end
2024-06-17 09:35:50 +00:00
# # Import user files from Gossamer Forums to Discourse
# def import_user_files(user)
# print "\rImporting files for user #{user.username}..."
#
# original_gossamer_id = user.custom_fields['original_gossamer_id']
# if original_gossamer_id.nil? || original_gossamer_id.empty?
# puts "User #{user.username} does not have a valid original_gossamer_id. Skipping file import."
# return
# end
#
# # puts "Original Gossamer ID for user #{user.username}: #{original_gossamer_id}"
#
# # Fetch and import user files
# execute_query("SELECT * FROM gforum_User_Files WHERE ForeignColKey = #{original_gossamer_id}").each do |file|
# # Construct the file URL
# file_url = "https://forum.slowtwitch.com/images/users/images/#{file['ID'] % 10}/#{file['ID']}-#{file['File_Name']}"
# puts "User #{user.username} User ID: #{user.id} original_gossamer_id: #{original_gossamer_id} file_url: #{file_url}"
#
# new_bio = user.user_profile.bio_raw + "\n\n![#{file['File_Name']}](#{file_url})"
# if new_bio.length > 3000
# puts "Warning: About Me for user #{user.username} (ID: #{user.id}) exceeds 3000 characters after adding file link. Truncating."
# new_bio = new_bio[0, 3000]
# end
# user.user_profile.bio_raw = new_bio
# user.user_profile.save!
# end
# print "Importing files for user #{user.username}... Done.\n"
# end
# Import user files (profile images) from Gossamer Forums to Discourse
2024-06-17 09:32:40 +00:00
def import_user_files(user)
2024-06-17 09:33:28 +00:00
print "\rImporting files for user #{user.username}..."
2024-06-17 09:35:50 +00:00
2024-06-17 09:34:34 +00:00
original_gossamer_id = user.custom_fields['original_gossamer_id']
if original_gossamer_id.nil? || original_gossamer_id.empty?
puts "User #{user.username} does not have a valid original_gossamer_id. Skipping file import."
return
end
2024-06-17 09:35:50 +00:00
puts "Original Gossamer ID for user #{user.username}: #{original_gossamer_id}"
images_imported = 0
2024-06-17 09:34:34 +00:00
execute_query("SELECT * FROM gforum_User_Files WHERE ForeignColKey = #{original_gossamer_id}").each do |file|
2024-06-17 09:32:40 +00:00
file_url = "https://forum.slowtwitch.com/images/users/images/#{file['ID'] % 10}/#{file['ID']}-#{file['File_Name']}"
2024-06-17 09:35:13 +00:00
puts "User #{user.username} User ID: #{user.id} original_gossamer_id: #{original_gossamer_id} file_url: #{file_url}"
2024-06-17 09:35:50 +00:00
next unless file['ForeignColName'] =~ /^user_image\d+$/
image_data = download_image(file_url)
next if image_data.nil?
temp_file = Tempfile.new(['user_image', '.jpg'])
temp_file.binmode
temp_file.write(image_data)
temp_file.rewind
if images_imported == 0
upload = upload_image(user, temp_file, file['File_Name'])
next if upload.nil?
user.user_avatar = UserAvatar.create!(user_id: user.id, custom_upload_id: upload.id)
user.save!
images_imported += 1
else
user.user_profile.bio_raw ||= ""
user.user_profile.bio_raw += "\n\n![#{file['File_Name']}](#{file_url})"
user.user_profile.save!
2024-06-17 09:35:13 +00:00
end
2024-06-17 09:35:50 +00:00
temp_file.close
temp_file.unlink
2024-06-17 09:32:40 +00:00
end
2024-06-17 09:34:34 +00:00
print "Importing files for user #{user.username}... Done.\n"
2024-06-17 09:32:40 +00:00
end
2024-06-17 09:35:50 +00:00
2024-06-17 09:35:13 +00:00
# Import categories from Gossamer Forums to Discourse
2024-06-17 09:32:40 +00:00
def import_categories
2024-06-17 09:35:50 +00:00
puts "Importing categories (forums)..."
execute_query("SELECT * FROM gforum_Forum").each do |row|
2024-06-17 09:35:13 +00:00
# Only create category if it does not exist
2024-06-17 09:35:50 +00:00
unless CategoryCustomField.exists?(name: 'original_gossamer_id', value: row['forum_id'])
category_name = row['forum_name']
category_description = row['forum_desc'] || "No description provided"
# Create category in Discourse
2024-06-17 09:35:13 +00:00
category = create_category(
2024-06-17 09:35:50 +00:00
{
2024-06-17 09:36:35 +00:00
id: row['forum_id'] + 10,
2024-06-17 09:35:50 +00:00
name: category_name,
description: category_description,
created_at: row['forum_last'] ? Time.at(row['forum_last']) : Time.now,
updated_at: row['forum_last'] ? Time.at(row['forum_last']) : Time.now
},
row['forum_id'] # import_id argument
2024-06-17 09:35:13 +00:00
)
2024-06-17 09:32:40 +00:00
2024-06-17 09:35:50 +00:00
# # Map Gossamer forum ID to Discourse category ID for future reference
# @forum_id_map[row['forum_id']] = category.id
2024-06-17 09:32:40 +00:00
2024-06-17 09:35:50 +00:00
# category.custom_fields.create!(name: 'original_gossamer_id', value: row['forum_id'])
category.custom_fields['original_gossamer_id'] = row['forum_id']
category.save!
end
2024-06-17 09:32:40 +00:00
end
2024-06-17 09:35:50 +00:00
puts "Importing categories... Done."
2024-06-17 09:32:40 +00:00
end
2024-06-17 09:36:35 +00:00
# Helper function to ensure title meets the minimum length requirement
def ensure_valid_title(title, min_length = 5)
if title.length < min_length
title += "." * (min_length - title.length) # Append dots to make it longer
end
title
end
2024-06-17 09:35:50 +00:00
# Import topics and posts from Gossamer Forums to Discourse
def import_topics_and_posts
puts "Importing topics and posts..."
2024-06-17 09:36:35 +00:00
# Execute the query to get all posts ordered by post_id
execute_query("SELECT * FROM gforum_Post ORDER BY post_id").each do |row|
2024-06-17 09:36:54 +00:00
puts "post_id #{row['post_id']} post_root_id #{row['post_root_id']} post_subject/title #{row['post_subject']} forum_id_fk/category_id #{row['forum_id_fk']}"
discourse_user_id = fetch_user_id_mapping(row['user_id_fk'])
# discourse_user_id = @user_id_map[row['user_id_fk']]
next unless discourse_user_id
2024-06-17 09:36:35 +00:00
if row['post_root_id'] == 0
# Ensure the title is valid
title = ensure_valid_title(row['post_subject'])
2024-06-17 09:35:50 +00:00
# Skip if the topic already exists
unless TopicCustomField.exists?(name: 'original_gossamer_id', value: row['post_id'])
# Create the topic
2024-06-17 09:36:35 +00:00
begin
topic = Topic.create!(
title: title,
2024-06-17 09:36:54 +00:00
user_id: discourse_user_id,
2024-06-17 09:36:35 +00:00
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_latest_reply']),
category_id: row['forum_id_fk'] + 10
)
topic.custom_fields['original_gossamer_id'] = row['post_id']
topic.save!
# Create the initial post in the topic
post = Post.create!(
topic_id: topic.id,
2024-06-17 09:36:54 +00:00
user_id: discourse_user_id,
2024-06-17 09:36:35 +00:00
raw: import_post_attachments(row['post_message'], row['post_id']),
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_latest_reply'])
)
post.custom_fields['original_gossamer_id'] = row['post_id']
post.save!
rescue ActiveRecord::RecordInvalid => e
puts "Error importing topic with post_id #{row['post_id']}: #{e.message}"
end
2024-06-17 09:35:50 +00:00
end
2024-06-17 09:36:35 +00:00
2024-06-17 09:35:50 +00:00
else
2024-06-17 09:36:35 +00:00
# Find the root topic for the post
root_topic_field = TopicCustomField.find_by(name: 'original_gossamer_id', value: row['post_root_id'])
if root_topic_field
topic_id = root_topic_field.topic_id
# Find the parent post for the reply
parent_post_field = PostCustomField.find_by(name: 'original_gossamer_id', value: row['post_father_id'])
reply_to_post_number = parent_post_field ? Post.find(parent_post_field.post_id).post_number : nil
2024-06-17 09:35:50 +00:00
# Create the post in the existing topic
2024-06-17 09:36:35 +00:00
begin
post = Post.create!(
topic_id: topic_id,
2024-06-17 09:36:54 +00:00
user_id: discourse_user_id,
2024-06-17 09:36:35 +00:00
raw: import_post_attachments(row['post_message'], row['post_id']),
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_latest_reply']),
reply_to_post_number: reply_to_post_number
)
post.custom_fields['original_gossamer_id'] = row['post_id']
post.save!
rescue ActiveRecord::RecordInvalid => e
puts "Error importing post with post_id #{row['post_id']}: #{e.message}"
end
else
puts "Warning: Root topic not found for post_id #{row['post_id']} with post_root_id #{row['post_root_id']}"
2024-06-17 09:35:13 +00:00
end
2024-06-17 09:32:40 +00:00
end
end
2024-06-17 09:35:50 +00:00
end
2024-06-17 09:32:40 +00:00
2024-06-17 09:36:35 +00:00
# Import topics and posts from Gossamer Forums to Discourse
# def import_topics_and_posts
# puts "Importing topics and posts..."
# execute_query("SELECT * FROM gforum_Post ORDER BY post_root_id, post_time").each do |row|
# puts "post_id #{row['post_id']} post_root_id #{row['post_root_id']} post_subject #{row['post_subject']}"
2024-06-17 09:35:50 +00:00
# Import attachments for a post
def import_post_attachments(post_message, post_id)
# Fetch attachments related to the post
attachments = execute_query("SELECT * FROM gforum_PostAttachment WHERE post_id_fk = #{post_id}")
attachments.each do |attachment|
# Append attachment links to the post message
file_url = "https://forum.slowtwitch.com/images/posts/attachments/#{attachment['ID'] % 10}/#{attachment['ID']}-#{attachment['File_Name']}"
post_message += "\n\n![#{attachment['File_Name']}](#{file_url})"
end
post_message
end
2024-06-17 09:35:13 +00:00
2024-06-17 09:35:50 +00:00
# Import personal messages (both inbox and sent messages)
def import_personal_messages
puts "Importing personal messages..."
import_inbox_messages
import_sent_messages
end
2024-06-17 09:35:13 +00:00
2024-06-17 09:35:50 +00:00
# Import inbox messages from gforum_Message table
def import_inbox_messages
puts "Importing inbox messages..."
execute_query("SELECT * FROM gforum_Message").each do |row|
# Skip if the message already exists
unless TopicCustomField.exists?(name: 'original_gossamer_msg_id', value: row['msg_id'])
# Create a private message topic in Discourse
2024-06-17 09:36:54 +00:00
discourse_user_id = @user_id_map[row['from_user_id_fk']]
2024-06-17 09:35:50 +00:00
topic = Topic.create!(
title: row['msg_subject'],
2024-06-17 09:36:54 +00:00
user_id: discourse_user_id,
2024-06-17 09:35:50 +00:00
archetype: Archetype.private_message,
created_at: Time.at(row['msg_time']),
updated_at: Time.at(row['msg_time'])
)
topic.custom_fields['original_gossamer_msg_id'] = row['msg_id']
topic.save!
# Create the message as a post in the private topic
Post.create!(
topic_id: topic.id,
2024-06-17 09:36:54 +00:00
user_id: discourse_user_id,
2024-06-17 09:35:50 +00:00
raw: row['msg_body'],
created_at: Time.at(row['msg_time']),
updated_at: Time.at(row['msg_time'])
)
# Add recipient user to the private message topic
topic.topic_allowed_users.create!(user_id: row['to_user_id_fk'])
2024-06-17 09:32:40 +00:00
end
end
2024-06-17 09:35:50 +00:00
end
# Import sent messages from gforum_SentMessage table
def import_sent_messages
puts "Importing sent messages..."
execute_query("SELECT * FROM gforum_SentMessage").each do |row|
# Skip if the message already exists
unless TopicCustomField.exists?(name: 'original_gossamer_sent_msg_id', value: row['msg_id'])
# Create a private message topic in Discourse
2024-06-17 09:36:54 +00:00
discourse_user_id = @user_id_map[row['from_user_id_fk']]
2024-06-17 09:35:50 +00:00
topic = Topic.create!(
title: row['msg_subject'],
2024-06-17 09:36:54 +00:00
user_id: discourse_user_id,
2024-06-17 09:35:50 +00:00
archetype: Archetype.private_message,
created_at: Time.at(row['msg_time']),
updated_at: Time.at(row['msg_time'])
)
topic.custom_fields['original_gossamer_sent_msg_id'] = row['msg_id']
topic.save!
# Create the message as a post in the private topic
Post.create!(
topic_id: topic.id,
2024-06-17 09:36:54 +00:00
user_id: discourse_user_id,
2024-06-17 09:35:50 +00:00
raw: row['msg_body'],
created_at: Time.at(row['msg_time']),
updated_at: Time.at(row['msg_time'])
)
# Add recipient user to the private message topic
topic.topic_allowed_users.create!(user_id: row['to_user_id_fk'])
end
end
end
2024-06-17 09:32:40 +00:00
2024-06-17 09:35:13 +00:00
# Main method to perform the import
2024-06-17 09:32:40 +00:00
def perform_import
2024-06-17 09:36:35 +00:00
RateLimiter.disable
2024-06-17 09:35:13 +00:00
puts "Starting Gossamer Forums import..."
2024-06-17 09:36:54 +00:00
import_users
2024-06-17 09:32:40 +00:00
import_categories
2024-06-17 09:36:54 +00:00
# import_topics_and_posts
# import_personal_messages
2024-06-17 09:35:13 +00:00
puts "Gossamer Forums import complete!"
2024-06-17 09:32:40 +00:00
end
end
2024-06-17 09:35:13 +00:00
GossamerForumsImporter.new.perform_import
2024-06-17 09:32:40 +00:00