Compare commits

..

4 Commits

Author SHA1 Message Date
008b9d74dd added suggestion for thread count tweaking 2024-08-15 12:58:03 -06:00
1811f7f3e0 index out of range issue fixed 2024-08-15 12:51:07 -06:00
6c85157c28 concurrency outline 2024-08-15 11:49:52 -06:00
349df7239f optimized main query for post import 2024-08-15 09:24:24 -06:00
4 changed files with 58 additions and 1240 deletions

View File

@ -1,9 +1,5 @@
# Federated Computer, Inc.
# David Sainty <saint@federated.computer> 2024 A.D.
# Gossamer Threads to Discourse -- CleanUp Script
# v0.14 Fix for Prod-Bitnami. Prep for 20240816 run.
require File.expand_path("../../../../config/environment", __FILE__)
# v0.13
require File.expand_path("../../../config/environment", __FILE__)
class GossamerForumsCleaner
def cleanup_users
@ -79,7 +75,7 @@ class GossamerForumsCleaner
# cleanup_messages
cleanup_posts
cleanup_topics
# cleanup_categories
cleanup_categories
# cleanup_users
puts "Cleanup complete!"
end

View File

@ -1,8 +1,8 @@
# Load the Discourse environment
require File.expand_path("../../../../config/environment", __FILE__)
require File.expand_path("../../../config/environment", __FILE__)
# Define usernames to exclude from deletion
excluded_usernames = ["saint", "discobot", "system","admin"]
excluded_usernames = ["saint", "discobot", "system"]
# Find all users except the excluded ones
users_to_delete = User.where.not(username: excluded_usernames)

View File

@ -1,7 +1,7 @@
# Federated Computer, Inc.
# David Sainty <saint@federated.computer> 2024 A.D.
# Gossamer Threads to Discourse -- Migration-Import Script
# v0.48 Further attempts to address MariaDB craziness.
# v0.36.7 Tweak for re-running as of 20240816
require 'mysql2'
require 'open-uri'
@ -16,11 +16,6 @@ require 'time'
require 'concurrent'
require 'sys/proctable'
require 'active_record'
require 'connection_pool'
require File.expand_path("../../../../config/environment", __FILE__)
# require_relative '../base'
require File.expand_path("../../../../script/import_scripts/base", __FILE__)
@ -29,18 +24,6 @@ class GossamerForumsImporter < ImportScripts::Base
def initialize
super
begin
# Database configuration for ActiveRecord
ActiveRecord::Base.establish_connection(
adapter: 'postgresql',
database: 'slowtwitch',
username: 'admin',
password: "yxnh93Ybbz2Nm8#mp28zCVv",
host: 'slowtwitch.northend.network',
pool: 20, # Adjust based on concurrency needs
timeout: 5000
)
# Initialize MySQL client to connect to Gossamer Forums database
@mysql_client = Mysql2::Client.new(
host: "slowtwitch.northend.network",
@ -59,16 +42,7 @@ class GossamerForumsImporter < ImportScripts::Base
end
def initialize_sqlite_id_name_url_db
@db = SQLite3::Database.new '/bitnami/discourse/sqlite/id_name_url_map.db'
###### ONLY when we need to clear the url_map and topic_import_status .... e.g. if reimporting topics-posts from scratch
# @db.execute <<-SQL
# DROP TABLE IF EXISTS url_map;
# SQL
# @db.execute <<-SQL
# DROP TABLE IF EXISTS topic_import_status;
# SQL
@db = SQLite3::Database.new 'id_name_url_map.db'
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS user_id_map (
old_user_id INTEGER PRIMARY KEY,
@ -90,6 +64,12 @@ class GossamerForumsImporter < ImportScripts::Base
real_name TEXT
);
SQL
# ONLY when we need to clear the url_map e.g. if reimporting topics-posts from scratch
# @db.execute <<-SQL
# DROP TABLE IF EXISTS url_map;
# SQL
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS url_map (
old_post_id INTEGER PRIMARY KEY,
@ -108,6 +88,7 @@ class GossamerForumsImporter < ImportScripts::Base
topic_id INTEGER PRIMARY KEY,
post_count INTEGER DEFAULT 0
);
SQL
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS user_topic_count (
@ -145,12 +126,6 @@ class GossamerForumsImporter < ImportScripts::Base
personal_id INTEGER
);
SQL
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS topic_import_status (
post_id INTEGER PRIMARY KEY,
status INTEGER DEFAULT 0
);
SQL
end
def insert_user_id_mapping(old_user_id, new_user_id)
@ -170,7 +145,7 @@ class GossamerForumsImporter < ImportScripts::Base
end
def insert_username_mapping(old_username, new_username, email, real_name)
@db.execute "INSERT OR REPLACE INTO username_map (old_username, new_username, email, real_name) VALUES (?, ?, ?, ?)", old_username, new_username, email, real_name
@db.execute "INSERT INTO username_map (old_username, new_username, email, real_name) VALUES (?, ?, ?, ?)", old_username, new_username, email, real_name
end
# Define a method to export the username mapping table to a CSV file
@ -189,7 +164,7 @@ class GossamerForumsImporter < ImportScripts::Base
# Insert a URL mapping into the SQLite database
def insert_url_mapping(old_post_id, new_url, title)
@db.execute "INSERT OR REPLACE INTO url_map (old_post_id, new_url, title) VALUES (?, ?, ?)", [old_post_id, new_url, title]
@db.execute "INSERT INTO url_map (old_post_id, new_url, title) VALUES (?, ?, ?)", [old_post_id, new_url, title]
end
# Export the URL mappings to a CSV file
@ -296,32 +271,11 @@ class GossamerForumsImporter < ImportScripts::Base
@db.execute "INSERT OR REPLACE INTO highest_processed_personal_id (id, personal_id) VALUES (1, ?)", personal_id
end
# Check if post_id exists and its status
def post_status(post_id)
result = @db.execute("SELECT status FROM topic_import_status WHERE post_id = ?", post_id).flatten.first
result.nil? ? nil : result.to_i
end
# Mark post_id as complete
def mark_post_as_complete(post_id)
@db.execute("INSERT OR REPLACE INTO topic_import_status (post_id, status) VALUES (?, 1)", post_id)
end
# Mark post_id as failed
def mark_post_as_failed(post_id)
@db.execute("INSERT OR REPLACE INTO topic_import_status (post_id, status) VALUES (?, 0)", post_id)
end
# Execute an SQL query on the Gossamer Forums database
def execute_query(query)
@mysql_client.query(query, as: :hash)
end
# Execute an SQL query on the Gossamer Forums database
def execute_query_concurrent(query, mysql_client)
mysql_client.query(query, as: :hash)
end
# Sanitize the username to meet Discourse's requirements
def sanitize_username(original_username, email, real_name)
# original_username = username
@ -482,7 +436,6 @@ class GossamerForumsImporter < ImportScripts::Base
upload
rescue => e
puts "FAILURE: Failed to upload attachment #{filename} for user_id #{user_id}: #{e.message}"
puts e.backtrace.join("\n") # Print the full stack trace
nil
end
end
@ -498,15 +451,14 @@ class GossamerForumsImporter < ImportScripts::Base
upload
rescue => e
puts "Failed to upload attachment #{filename} for user_id #{user_id}: #{e.message}"
puts e.backtrace.join("\n") # Print the full stack trace
nil
end
end
# Helper method to handle post attachments
def handle_post_attachments(gossamer_post_id, post, user_id, mysql_client)
execute_query_concurrent("SELECT * FROM gforum_PostAttachment WHERE post_id_fk = #{gossamer_post_id}", mysql_client).each do |att_row|
def handle_post_attachments(gossamer_post_id, post, user_id)
execute_query("SELECT * FROM gforum_PostAttachment WHERE post_id_fk = #{gossamer_post_id}").each do |att_row|
attachment_url = "https://forum.slowtwitch.com/forum/?do=post_attachment;postatt_id=#{att_row['postatt_id']}"
puts "Handling attachment: #{attachment_url}"
attachment_data = download_attachment(attachment_url)
@ -552,7 +504,6 @@ class GossamerForumsImporter < ImportScripts::Base
# file
# rescue => e
# puts "Failed to download file from #{url}: #{e.message}"
# puts e.backtrace.join("\n") # Print the full stack trace
# nil
# end
# end
@ -596,7 +547,7 @@ class GossamerForumsImporter < ImportScripts::Base
# Import users from Gossamer Forums to Discourse
def import_users
puts "Importing Users..."
puts "Importing users..."
users = []
# Fetch all users from Gossamer Forums
@ -625,88 +576,22 @@ class GossamerForumsImporter < ImportScripts::Base
# insert_user_id_mapping(user[:id], user.id)
user
end
end
# Generate SQLite user ID mapping between Discourse and Gossamer
def generate_user_id_mapping
puts "Generating User ID Mapping..."
users = []
# Fetch all users from Gossamer Forums
execute_query("SELECT * FROM gforum_User").each do |row|
users << {
id: row['user_id'],
username: sanitize_username(row['user_username'], row['user_email'], row['user_real_name']),
email: row['user_email'],
created_at: Time.at(row['user_registered']),
updated_at: Time.at(row['user_last_seen']),
name: row['user_real_name'],
title: row['user_title'],
bio_raw: row['user_about'] || "",
website: row['user_homepage'],
location: row['user_location'],
custom_fields: {
md5_password: row['user_password'],
original_username: row['user_username'],
original_gossamer_id: row['user_id']
}
}
end
# For each user, add user ID mapping to SQLite now that we know what the Discourse user ID is
# For each user, add user ID mapping to SQLite now that we know what the Discourse user ID is, ... and append user bio and import user files
users.each do |user|
# discourse_username = sanitize_username(user[:username], user[:email], user[:name])
discourse_username = user[:username]
discourse_user = User.find_by(username: discourse_username)
if discourse_user.nil?
puts "User #{user[:username]} --> #{discourse_username} not found in Discourse. Skipping user mapping addition."
puts "User #{user[:username]} --> #{discourse_username} not found in Discourse. Skipping file import."
next
end
# Store the user ID mapping
# # Store the user ID mapping
# @user_id_map[user[:id]] = discourse_user.id
puts "for insert_user_id_mapping: user[:id] #{user[:id]} discourse_user.id #{discourse_user.id}"
insert_user_id_mapping(user[:id], discourse_user.id)
end
end
# Import and set user Bio and Images
def set_user_bio_images
puts "Setting User Bio and Images..."
users = []
# Fetch all users from Gossamer Forums
execute_query("SELECT * FROM gforum_User").each do |row|
users << {
id: row['user_id'],
username: sanitize_username(row['user_username'], row['user_email'], row['user_real_name']),
email: row['user_email'],
created_at: Time.at(row['user_registered']),
updated_at: Time.at(row['user_last_seen']),
name: row['user_real_name'],
title: row['user_title'],
bio_raw: row['user_about'] || "",
website: row['user_homepage'],
location: row['user_location'],
custom_fields: {
md5_password: row['user_password'],
original_username: row['user_username'],
original_gossamer_id: row['user_id']
}
}
end
# For each user, append user bio and import user files
users.each do |user|
# discourse_username = sanitize_username(user[:username], user[:email], user[:name])
discourse_username = user[:username]
discourse_user = User.find_by(username: discourse_username)
if discourse_user.nil?
puts "User #{user[:username]} --> #{discourse_username} not found in Discourse. Skipping bio-image setting."
next
end
# Ensure user profile exists and bio_raw is a string
discourse_user.user_profile ||= UserProfile.new(user_id: discourse_user.id)
@ -771,7 +656,6 @@ class GossamerForumsImporter < ImportScripts::Base
png_path if File.exist?(png_path)
rescue => e
puts "Failed to convert image #{file_path}: #{e.message}"
puts e.backtrace.join("\n") # Print the full stack trace
nil
end
end
@ -784,7 +668,6 @@ class GossamerForumsImporter < ImportScripts::Base
resized_path if File.exist?(resized_path)
rescue => e
puts "Failed to resize image #{file_path}: #{e.message}"
puts e.backtrace.join("\n") # Print the full stack trace
nil
end
end
@ -876,7 +759,6 @@ class GossamerForumsImporter < ImportScripts::Base
FileUtils.copy_file(resized_image_path, resized_temp_file.path)
rescue => e
puts "Skipping image due to resize failure: #{temp_file.path}"
puts e.backtrace.join("\n") # Print the full stack trace
temp_file.close
temp_file.unlink
next
@ -1045,446 +927,50 @@ class GossamerForumsImporter < ImportScripts::Base
result ? result['post_views'] : 0
end
########## THREADING START --------------------------------------------
#THREADING OUTLINE HERE --------------------------------------------
# Method to dynamically calculate the optimal thread pool size based on system load
def calculate_dynamic_pool_size
# Fetch current CPU load average using Sys::ProcTable.loadavg
# load_avg = Sys::ProcTable.loadavg.last # Get the 15-minute load average
# load_avg = Sys::ProcTable.loadavg
load_avg = File.read('/proc/loadavg').split
# Calculate the pool size based on the load average
# Adjust the multiplier and threshold as needed
# pool_size = [(Concurrent.processor_count / (load_avg + 0.1)).to_i, 1].max
# Extract the 1-minute load average from the fetched data
one_minute_load_avg = load_avg[0].to_f
# Determine how many logical CPU cores are available on the system
cpu_count = Concurrent.processor_count
# Log the current load and CPU information for debugging and monitoring purposes
puts "1-minute Load Average: #{one_minute_load_avg}, CPU Count: #{cpu_count}"
# Calculate the initial pool size based on the ratio of the 1-minute load average to the number of CPUs
# This ratio gives an idea of how many threads should be running to efficiently utilize the CPU resources
initial_pool_size = (cpu_count / one_minute_load_avg).ceil
# Ensure the pool size is at least 1 to avoid creating a pool with zero threads
initial_pool_size = 1 if initial_pool_size < 1
# Cap the maximum pool size to twice the number of CPUs
# This prevents overloading the system with too many threads, which could lead to diminishing returns
max_pool_size = cpu_count * 2
# Adjust the final pool size to be within the valid range (1 to max_pool_size)
pool_size = [[initial_pool_size, max_pool_size].min, 1].max
puts "Calculated and adjusted dynamic pool size: #{pool_size}" # Log the dynamically adjusted pool size
pool_size
end
# Get list of TOPICS / OP posts, i.e. post ids that have no parent / root id - SELECT post_id FROM gforum_Post WHERE post_root_id = 0;
def threaded_topic_import
# Get list of IDS that have no parent ID - SELECT post_id FROM gforum_Post WHERE post_root_id = 0;
parent_post_ids = execute_query("SELECT post_id FROM gforum_Post WHERE post_root_id = 0")
# Use CachedThreadPool for dynamic thread management
#### pool = Concurrent::CachedThreadPool.new
###### pool = Concurrent::FixedThreadPool.new(7)
pool = Concurrent::FixedThreadPool.new(12)
parent_post_count = parent_post_ids.count
batch_size = 100 #set our batch size
current_post_batch = 0 #set our current batch number
is_complete = false
# Define the connection pool inside the method
###### mariadb_pool = ConnectionPool.new(size: 14, timeout: 100) do
mariadb_pool = ConnectionPool.new(size: 24, timeout: 100) do
Mysql2::Client.new(
host: "slowtwitch.northend.network",
username: "admin",
password: "yxnh93Ybbz2Nm8#mp28zCVv",
database: "slowtwitch"
)
end
# The query selects post_ids from gforum_Post where post_root_id is 0, meaning these posts are the topic starters (OPs).
# Execute the query and fetch the result
# result = execute_query("SELECT post_id FROM gforum_Post WHERE post_root_id = 0 ORDER BY post_id ASC")
result = execute_query("SELECT post_id FROM gforum_Post WHERE post_root_id = 0 ORDER BY post_id ASC")
# Convert the result set to an array of post_ids
parent_post_ids = result.map { |row| row['post_id'] }
# parent_post_count = parent_post_ids.count
batch_size = 10 # Set our batch size for number of posts to import in a single batch
#### current_post_batch = 0 # Set our current batch number. This tracks the current batch of posts being processed.
is_complete = false # Flag to indicate whether the import process is complete.
# Mutex to control access to shared resources
### mutex = Mutex.new # Mutex for MySQL2 operations -- disabled as this may not in fact be necessary - TBD.
sqlite_mutex = Mutex.new # Mutex for SQLite opreations
# Run until all posts have been processed.
until is_complete
# Query in batches, create pool, wait for termination, do it again
# SELECT post_id FROM gforum_Post WHERE post_root_id = 0 ORDER BY post_id
current_post_batch_max = current_post_batch + batch_size
pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) #create thread pool that is bounded by processors avaialable, however play with the number to see what works best
# puts "QQ 11 -- GETTING NEXT BATCH ****************************************"
#### # Query in batches, create pool, wait for termination, do it again
#### current_post_batch_max = current_post_batch + batch_size
# Get the next batch of posts
current_post_batch = parent_post_ids.shift(batch_size)
break if current_post_batch.empty?
# Process each post in the current batch
current_post_batch.each do |post_id|
# puts "QQ 22 -- #{post_id}"
####### # Static pool size based on number of CPUs
# # pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) # Create a thread pool that is bounded by processors avaialable
# # pool = Concurrent::FixedThreadPool.new(8) # Create a thread pool of 8 pool members
#### # Dynamically calculate the pool size based on system load to optimise performance
#### pool_size = calculate_dynamic_pool_size # Dynamically calculate what the pool size "ought" to be.
#### pool = Concurrent::FixedThreadPool.new(pool_size) # Create a thread pool with the calculated size
#### # Process each post in the current batch
#### while current_post_batch < current_post_batch_max
#### post_id = parent_post_ids[current_post_batch] # Fetch the post_id for the current post
#### # Check if the post has already been processed or is incomplete
#### post_status = post_status(post_id)
# Submit the import job for the current post_id to the thread pool
while current_post_batch < current_post_batch_max
post_id = parent_post_ids[current_post_batch]
pool.post do
# Initialise a new MariaDB / Mysql2 client inside of each thread
#### mysql_client = Mysql2::Client.new(
#### host: "slowtwitch.northend.network",
#### username: "admin",
#### password: "yxnh93Ybbz2Nm8#mp28zCVv",
#### database: "slowtwitch"
#### )
#### puts "PP 22 -- #{post_id}"
topic_import_job(post_id)
end
current_post_batch += 1
break if current_post_batch >= parent_post_count
end
mariadb_pool.with do |mysql_client|
begin
pool.shutdown
pool.wait_for_termination
# Ensure the connection is active, otherwise reconnect
puts "PP 11 -- #{post_id} -- Checking MySQL connections status.."
mysql_client.ping || mysql_client = Mysql2::Client.new(
host: "slowtwitch.northend.network",
username: "admin",
password: "yxnh93Ybbz2Nm8#mp28zCVv",
database: "slowtwitch"
)
#### puts " FIRST Checking MySQL connection status..."
#### if mysql_client.query('SELECT 1').nil?
#### puts " MySQL connection is not valid"
#### else
#### puts " MySQL connection is valid"
#### end
# Use connection pooling for PostgreSQL and synchronize access to shared resources
ActiveRecord::Base.connection_pool.with_connection do
post_status = post_status(post_id)
if post_status.nil? || post_status == 0
puts "Starting import for post_id #{post_id}"
topic_import_job(post_id, sqlite_mutex, mysql_client) # Import topic and its replies
sqlite_mutex.synchronize do
mark_post_as_complete(post_id) # Mark as complete in SQLite table
end
else
puts "Skipping post_id #{post_id}, already processed."
end
end
rescue => e
puts "Error processing post ID #{post_id}: #{e.message}"
puts e.backtrace.join("\n") # Print the full stack trace
sqlite_mutex.synchronize do
mark_post_as_failed(post_id)
end
if e.message =~ /MySQL client is not connected/
puts "Reconnecting to MySQL for post ID #{post_id} due to connection loss..."
retry
end
#### ensure
#### # Ensure the MariaDB connection is closed after processing
#### mysql_client.close if mysql_client
#### puts "** CLOSED MariaDB client"
#### puts "PP 22 -- #{post_id}"
#### puts " FINAL Checking MySQL connection status..."
#### if mysql_client.query('SELECT 1').nil?
#### puts " MySQL connection is not valid"
#### else
#### puts " MySQL connection is valid"
#### end
if current_post_batch >= parent_post_count
is_complete = true
end
end
end
def topic_import_job(post_id)
#Here is where you can import the entire topic
#Get post -- SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_id = post_id
#check if exists, create if not
#get children, create -- SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_root_id = post_id
#this parts needs to be synchronously to avoid race conditions
end
#### current_post_batch += 1 # Increment, moving to next post in the batch
#### break if current_post_batch >= parent_post_count
# Check if all posts have been processed
#### is_complete = true if current_post_batch >= parent_post_count
is_complete = parent_post_ids.empty?
end
# Wait for all jobs in the current batch to finish before proceeding
puts "PP 33 -- Ready for shutdown"
pool.shutdown # Initiate thread pool shutdown after all jobs submitted
puts "PP 44 -- Now wait for termination"
pool.wait_for_termination # Wait for all threads to finish exec
end
# # Method to ensure thread-safe updates to highest_processed_post_id
# def update_highest_processed_post_id_thread_safe(post_id)
# @highest_processed_mutex ||= Mutex.new
# @highest_processed_mutex.synchronize do
# if post_id > fetch_highest_processed_post_id
# update_highest_processed_post_id(post_id)
# end
# end
# end
# Method to import an entire topic, including its first post and all subsequent replies
def topic_import_job(post_id, sqlite_mutex, mysql_client)
##### def topic_import_job(post_id, sqlite_mutex)
puts "TIJ ZZ post_id #{post_id}"
##### mysql_client = Mysql2::Client.new(
##### host: "slowtwitch.northend.network",
##### username: "admin",
##### password: "yxnh93Ybbz2Nm8#mp28zCVv",
##### database: "slowtwitch"
##### )
puts " FIRST Checking MySQL connection status..."
if mysql_client.query('SELECT 1').nil?
puts " MySQL connection is not valid, TRY TO RECONNECT II"
mysql_client.ping || mysql_client = Mysql2::Client.new(
host: "slowtwitch.northend.network",
username: "admin",
password: "yxnh93Ybbz2Nm8#mp28zCVv",
database: "slowtwitch"
)
else
puts " MySQL connection is valid"
end
puts "TIJ AA post_id #{post_id}"
# Fetch the post data for the given post_id (this is the first post in the topic)
row = execute_query_concurrent("SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_id = #{post_id}", mysql_client).first
puts "TIJ BB post_id #{post_id}"
# Early return if the post data is not found
return unless row
puts "TIJ CC post_id #{post_id}"
# Extract key values from the fetched row
post_id = row['post_id'].to_i
puts "Processing post_id #{row['post_id']} post_root_id #{row['post_root_id']} post_subject/title #{row['post_subject']} forum_id_fk/category_id #{row['forum_id_fk']}"
# Fetch the mapped Discourse user and category ID based on Gossamer data
discourse_user_id = fetch_user_id_mapping(row['user_id_fk'])
discourse_category_id = fetch_category_id_mapping(row['forum_id_fk'])
puts "discourse_user_id #{discourse_user_id} discourse_category_id #{discourse_category_id}"
return unless discourse_user_id && discourse_category_id
puts "TIJ DD post_id #{post_id}"
# Ensure the topic title is valid and generate a unique title if needed
title = ensure_valid_title(row['post_subject'])
unique_title = title
# Fetch the number of views the post has had
post_views = fetch_post_views(row['post_id'])
# Check if the topic has already been imported using the custom field 'original_gossamer_id'
unless TopicCustomField.exists?(name: 'original_gossamer_id', value: row['post_id'])
puts "TIJ EE post_id #{post_id}"
ActiveRecord::Base.transaction do
# Create the new topic in Discourse
begin
suffix = 1
topic_created = false
while !topic_created
begin
puts "TIJ FF post_id #{post_id}"
puts "CREATE TOPIC unique_title #{unique_title} title #{title} discourse_user_id #{discourse_user_id} category_id #{discourse_category_id}"
topic = Topic.create!(
title: unique_title,
user_id: discourse_user_id,
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_time']),
category_id: discourse_category_id,
views: post_views || 0,
posts_count: 0
)
topic.custom_fields['original_gossamer_id'] = row['post_id']
topic.save!
topic_created = true
# rescue ActiveRecord::RecordInvalid => e
rescue => e
if e.message.include?("Title has already been used")
unique_title = "#{title} (#{suffix})"
suffix += 1
else
raise e
end
# puts e.backtrace.join("\n") # Print the full stack trace
end
end
# Workaround... take a copy of topic.id
current_topic_id = topic.id
sqlite_mutex.synchronize do
# Update the database with the last post time and user for the topic
update_db_topic_last_post_time(current_topic_id, Time.at(row['post_time']).to_i)
update_db_topic_last_post_user(current_topic_id, discourse_user_id)
# Increment the topic count for the user
update_db_user_topic_count(discourse_user_id, fetch_db_user_topic_count(discourse_user_id).to_i + 1)
end
# Sanitize and prepare the post message for Discourse
sanitized_post_message = sanitize_post_message(row['post_message'])
puts "CREATE TOPIC POST for current_topic_id #{current_topic_id} discourse_user_id #{discourse_user_id}"
post_number = 0
# Increment the post count for the topic
post_number = fetch_db_topic_post_numbers(current_topic_id).to_i + 1
sqlite_mutex.synchronize do
update_db_topic_post_numbers(current_topic_id, post_number)
end
puts "TIJ GG post_id #{post_id}"
# Create the initial post in the new topic
post = Post.create!(
topic_id: current_topic_id,
user_id: discourse_user_id,
raw: sanitized_post_message,
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_time']),
reads: post_views || 0,
post_number: post_number
)
post.custom_fields['original_gossamer_id'] = row['post_id']
post.save!
sqlite_mutex.synchronize do
# Increment the post count for the topic and user
update_db_topic_post_count(current_topic_id, fetch_db_topic_post_count(current_topic_id).to_i + 1)
update_db_user_post_count(discourse_user_id, fetch_db_user_post_count(discourse_user_id).to_i + 1)
end
puts "TIJ HH post_id #{post_id}"
# Handle any attachments associated with the post
handle_post_attachments(row['post_id'], post, discourse_user_id, mysql_client)
# Create URL mappings for the new topic
new_url = "https://new/t/#{topic.slug}/#{current_topic_id}"
sqlite_mutex.synchronize do
insert_url_mapping(row['post_id'], new_url, unique_title)
end
# Fetch and import all replies to this topic
replies = execute_query_concurrent("SELECT post_id, user_id_fk, post_message, post_time FROM gforum_Post WHERE post_root_id = #{post_id} ORDER BY post_time ASC", mysql_client)
# Import each reply sequentially
replies.each do |reply_row|
## begin
# Fetch the discourse user ID for the reply
reply_user_id = fetch_user_id_mapping(reply_row['user_id_fk'])
if reply_user_id = 0
former_user = User.find_by(username: 'Former_User')
reply_user_id = former_user.id
end
puts "TIJ II post_id #{post_id}"
# Sanitize and prepare the reply message for Discourse
sanitized_reply_message = sanitize_post_message(reply_row['post_message'])
puts "CREATE REPLY in current_topic_id #{current_topic_id} for reply post_id #{reply_row['post_id']}"
### def get_topic_id
### return topic.id
### end
# Increment the post count for the topic
post_number = fetch_db_topic_post_numbers(current_topic_id).to_i + 1
sqlite_mutex.synchronize do
update_db_topic_post_numbers(current_topic_id, post_number)
### update_db_topic_post_numbers(get_topic_id, post_number)
end
# Fetch the number of views the post has had
reply_post_views = fetch_post_views(reply_row['post_id'])
# crazy sanity check
if topic.nil?
puts "ERROR: Topic is nil for reply post_id #{reply_row['post_id']}, attempting to BYPASS anyway"
end
puts "TIJ JJ post_id #{post_id} reply post_id #{reply_row['post_id']} reply_post_views #{reply_post_views || 0} post_number #{post_number} current_topic_id #{current_topic_id} reply_post_views #{reply_post_views || 0}"
# Create the reply post in the existing topic
post = Post.create!(
topic_id: current_topic_id,
user_id: reply_user_id,
raw: sanitized_reply_message,
created_at: Time.at(reply_row['post_time']),
updated_at: Time.at(reply_row['post_time']),
reads: reply_post_views || 0,
post_number: post_number
)
post.custom_fields['original_gossamer_id'] = reply_row['post_id']
post.save!
puts "TIJ KK post_id #{post_id}"
# Increment the post count for the topic and user
update_db_topic_post_count(current_topic_id, fetch_db_topic_post_count(current_topic_id).to_i + 1)
update_db_user_post_count(reply_user_id, fetch_db_user_post_count(reply_user_id).to_i + 1)
# Update last post time and user for the topic
if fetch_db_topic_last_post_time(current_topic_id).nil? || Time.at(reply_row['post_time']).to_i > fetch_db_topic_last_post_time(current_topic_id).to_i
update_db_topic_last_post_time(current_topic_id, Time.at(reply_row['post_time']).to_i)
update_db_topic_last_post_user(current_topic_id, reply_user_id)
end
# Handle any attachments associated with the reply
handle_post_attachments(reply_row['post_id'], post, reply_user_id, mysql_client)
# # Update the highest processed post_id in the database (thread-safe)
# update_highest_processed_post_id_thread_safe(reply_row['post_id'])
# rescue ActiveRecord::RecordInvalid => e
## rescue => e
## puts "Error importing reply with post_id #{reply_row['post_id']}: #{e.message}"
## puts e.backtrace.join("\n") # Print the full stack trace
## end
end
# # After processing the entire topic, update the highest_processed_post_id to the current topic's post_id (thread-safe)
# update_highest_processed_post_id_thread_safe(post_id)
rescue ActiveRecord::RecordInvalid => e
puts "Error importing topic with post_id #{row['post_id']}: #{e.message}"
raise ActiveRecord::Rollback
end
end
else
puts "Topic for post_id #{row['post_id']} already exists, skipping creation."
end
puts " LAST Removing MySQL connection"
##### mysql_client.close # if mysql_client
end
########## THREADING END --------------------------------------------
#-------------------------------------------------------------------
# Import topics and posts from Gossamer Forums to Discourse
def import_topics_and_posts_with_attachments
@ -1507,7 +993,7 @@ class GossamerForumsImporter < ImportScripts::Base
# Attachment example: highest_processed_post_id = 1359862
# Execute the query to get all posts ordered by post_id
execute_query("SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_likes, post_replies FROM gforum_Post ORDER BY post_id").each do |row|
execute_query("SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post ORDER BY post_id").each do |row|
post_id = row['post_id'].to_i
# Skip posts that have already been processed
@ -1943,21 +1429,15 @@ class GossamerForumsImporter < ImportScripts::Base
# add_former_user
# import_users
# generate_user_id_mapping
# export_username_mapping_to_csv("/bitnami/discourse/sqlite/gossamer-migration-username-mapping#{timestamp}")
## set_user_bio_images
# export_username_mapping_to_csv("gossamer-migration-username-mapping#{timestamp}")
#
# import_categories
####### import_topics_and_posts_with_attachments
threaded_topic_import
#
import_topics_and_posts_with_attachments
update_topic_stats
update_user_stats
export_url_mapping_to_csv("/bitnami/discourse/sqlite/gossamer-migration-url-mapping#{timestamp}")
create_nginx_rewrite_rules("/bitnami/discourse/sqlite/gossamer-redirects#{timestamp}.conf")
export_url_mapping_to_csv("gossamer-migration-url-mapping#{timestamp}")
create_nginx_rewrite_rules("gossamer-redirects#{timestamp}.conf")
import_personal_messages

658
gosss.rb
View File

@ -1,658 +0,0 @@
# gossamer threads migration-import code
# v0.12
require 'mysql2'
require 'open-uri'
require 'net/http'
require 'tempfile'
require 'sqlite3'
require 'digest'
require 'fileutils'
require 'csv'
require 'time'
require File.expand_path("../../../config/environment", __FILE__)
require_relative 'base'
class GossamerForumsImporter < ImportScripts::Base
def initialize
super
begin
# Initialize MySQL client to connect to Gossamer Forums database
@mysql_client = Mysql2::Client.new(
host: "slot.northend.network",
username: "admin",
password: "yxnh93Ybbz2Nm8#mp28zCVv",
database: "slot"
)
rescue Mysql2::Error => e
puts "Error connecting to MySQL: #{e.message}"
exit 1
end
# # Create a mapping of old Gossamer user IDs to new Discourse user IDs
# @user_id_map = {}
initialize_sqlite_id_name_url_db
end
def initialize_sqlite_id_name_url_db
@db = SQLite3::Database.new 'id_name_url_map.db'
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS user_id_map (
old_user_id INTEGER PRIMARY KEY,
new_user_id INTEGER
);
SQL
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS category_id_map (
old_category_id INTEGER PRIMARY KEY,
new_category_id INTEGER
);
SQL
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS username_map (
id INTEGER PRIMARY KEY,
old_username TEXT,
new_username TEXT,
email TEXT,
real_name TEXT
);
SQL
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS url_map (
old_post_id INTEGER PRIMARY KEY,
new_url TEXT,
title TEXT
# created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
SQL
end
def insert_user_id_mapping(old_user_id, new_user_id)
@db.execute "INSERT OR REPLACE INTO user_id_map (old_user_id, new_user_id) VALUES (?, ?)", old_user_id, new_user_id
end
def fetch_user_id_mapping(old_user_id)
@db.get_first_value "SELECT new_user_id FROM user_id_map WHERE old_user_id = ?", old_user_id
end
def insert_category_id_mapping(old_category_id, new_category_id)
@db.execute "INSERT OR REPLACE INTO category_id_map (old_category_id, new_category_id) VALUES (?, ?)", old_category_id, new_category_id
end
def fetch_category_id_mapping(old_category_id)
@db.get_first_value "SELECT new_category_id FROM category_id_map WHERE old_category_id = ?", old_category_id
end
def insert_username_mapping(old_username, new_username, email, real_name)
@db.execute "INSERT INTO username_map (old_username, new_username, email, real_name) VALUES (?, ?, ?, ?)", old_username, new_username, email, real_name
end
# Define a method to export the username mapping table to a CSV file
def export_username_mapping_to_csv(filename)
CSV.open(filename, 'w') do |csv|
# Add headers
csv << ['Old Username', 'New Username', 'Email', 'Full Name']
# Fetch data from the database
@db.execute("SELECT old_username, new_username, email, real_name FROM username_map") do |row|
csv << row
end
end
puts "Exported changed username mappings to #{filename}"
end
# Insert a URL mapping into the SQLite database
def insert_url_mapping(old_post_id, new_url, title)
@db.execute "INSERT INTO url_map (old_post_id, new_url, title) VALUES (?, ?, ?)", [old_post_id, new_url, title]
end
# Export the URL mappings to a CSV file
def export_url_mapping_to_csv(filename)
CSV.open(filename, "w") do |csv|
# Add headers
csv << ["Old Post ID", "New URL", "Title"]
@db.execute("SELECT old_post_id, new_url, title FROM url_map") do |row|
csv << row
end
end
puts "Exported URL mappings to #{filename}"
end
# Method to create Nginx rewrite rules file
def create_nginx_rewrite_rules(filename)
File.open(filename, "w") do |file|
@db.execute("SELECT old_post_id, new_url FROM url_map") do |row|
old_post_id, new_url = row
file.puts "rewrite ^/forum/.*P#{old_post_id}/$ #{new_url} permanent;"
end
end
end
# Execute an SQL query on the Gossamer Forums database
def execute_query(query)
@mysql_client.query(query, as: :hash)
end
# Sanitize the username to meet Discourse's requirements
def sanitize_username(original_username, email, real_name)
# original_username = username
sanitized_username = username.gsub(/[^a-zA-Z0-9._-]/, '_')
sanitized_username = "#{sanitized_username}." if sanitized_username.length < 2 # Allow two-character usernames
sanitized_username = sanitized_username[0, 20] if sanitized_username.length > 20
firststep_sanitized = sanitized_username
existing_user = User.find_by(username: sanitized_username)
if existing_user
if existing_user.email.downcase == email.downcase && existing_user.name == name
return sanitized_username
else
counter = 1
while User.exists?(username: sanitized_username)
sanitized_username = "#{firststep_sanitized}_#{counter}"
sanitized_username = sanitized_username[0, 20] if sanitized_username.length > 20
counter += 1
end
end
end
if original_username != sanitized_username
# The Discourse username is not the same as the Gossamer Forums username
puts "Sanitized username: '#{original_username}' --> '#{sanitized_username}'"
insert_username_mapping(original_username, sanitized_username, email, real_name)
# else
# puts "UNsanitized username: '#{original_username}' --> '#{sanitized_username}'"
end
sanitized_username
end
# Sanitize email to replace restricted domains
def sanitize_email(email)
restricted_domains = ['mailinator.com', 'example.com'] # Add more restricted domains as needed
domain = email.split('@').last
if restricted_domains.include?(domain)
sanitized_email = email.gsub(domain, 'example.org') # Change to a permissible domain
puts "Sanitized email: '#{email}' --> '#{sanitized_email}'"
return sanitized_email
end
email
end
# Helper method to download an image from a URL
def download_image(url)
begin
URI.open(url).read
rescue OpenURI::HTTPError => e
puts "Failed to download image from #{url}: #{e.message}"
nil
rescue URI::InvalidURIError => e
puts "Failed to handle invalid URL/URI for #{url}: #{e.message}"
nil
end
end
def upload_image(user, file, filename, gossamer_url)
begin
upload = Upload.create!(
user_id: user.id,
original_filename: filename,
filesize: file.size,
# filesize: File.size(file.path),
# content_type: `file --brief --mime-type #{file.path}`.strip,
# sha1: Digest::SHA1.file(file.path).hexdigest,
# origin: 'user_avatar',
# retain_hours: nil,
url: gossamer_url
)
# Error -- non-existent method upload.ensure_consistency!
# Move the file to the correct location
# FileUtils.mv(file.path, upload.path)
upload.save!
upload
rescue => e
puts "Failed to upload image #{filename} for user #{user.username}: #{e.message}"
nil
end
end
# def download_file(url)
# require 'open-uri'
# begin
# file = Tempfile.new
# file.binmode
# file.write(URI.open(url).read)
# file.rewind
# file
# rescue => e
# puts "Failed to download file from #{url}: #{e.message}"
# nil
# end
# end
# Helper method to upload an image to Discourse
# def upload_image(user, image_data, filename)
# return if image_data.nil?
#
# upload = Upload.create_for(user.id, File.open(image_data.path), filename, 'image/jpeg')
# if upload.nil? || !upload.persisted?
# puts "Failed to upload image for user #{user.username}"
# return
# end
#
# upload
# end
# Import users from Gossamer Forums to Discourse
def import_users
puts "Importing users..."
users = []
# Fetch all users from Gossamer Forums
execute_query("SELECT * FROM gforum_User").each do |row|
users << {
id: row['user_id'],
username: sanitize_username(row['user_username'], row['user_email'], row['user_real_name']),
email: row['user_email'],
created_at: Time.at(row['user_registered']),
updated_at: Time.at(row['user_last_seen']),
name: row['user_real_name'],
title: row['user_title'],
bio_raw: row['user_about'] || "",
website: row['user_homepage'],
location: row['user_location'],
custom_fields: {
md5_password: row['user_password'],
original_username: row['user_username'],
original_gossamer_id: row['user_id']
}
}
end
# Create or update users in Discourse
create_users(users) do |user|
# insert_user_id_mapping(user[:id], user.id)
user
end
# For each user, add user ID mapping to SQLite now that we know what the Discourse user ID is, ... and append user bio and import user files
users.each do |discourse_user|
# discourse_username = sanitize_username(user[:username], user[:email], user[:name])
# discourse_user = User.find_by(username: discourse_username)
# if discourse_user.nil?
# puts "User #{user[:username]} --> #{discourse_username} not found in Discourse. Skipping file import."
# next
# end
# # Store the user ID mapping
# @user_id_map[user[:id]] = discourse_user.id
puts "for insert_user_id_mapping: user[:id] #{user[:id]} discourse_user.id #{discourse_user.id}"
insert_user_id_mapping(user[:id], discourse_user.id)
# Ensure user profile exists and bio_raw is a string
discourse_user.user_profile ||= UserProfile.new(user_id: discourse_user.id)
discourse_user.user_profile.bio_raw ||= ""
# Append bio if it exists, otherwise set it to empty string to avoid nil errors
if discourse_user.user_profile.bio_raw.empty?
discourse_user.user_profile.bio_raw = user[:bio_raw]
else
discourse_user.user_profile.bio_raw += "\n\n" + user[:bio_raw]
end
# Ensure the bio does not exceed 3000 characters
if discourse_user.user_profile.bio_raw.length > 3000
puts "Warning: About Me for user #{discourse_user.username} (ID: #{discourse_user.id}) exceeds 3000 characters. Truncating."
discourse_user.user_profile.bio_raw = discourse_user.user_profile.bio_raw[0, 3000]
end
discourse_user.user_profile.save!
# Import user files
import_user_files(discourse_user)
end
end
# # Import user files from Gossamer Forums to Discourse
# def import_user_files(user)
# print "\rImporting files for user #{user.username}..."
#
# original_gossamer_id = user.custom_fields['original_gossamer_id']
# if original_gossamer_id.nil? || original_gossamer_id.empty?
# puts "User #{user.username} does not have a valid original_gossamer_id. Skipping file import."
# return
# end
#
# # puts "Original Gossamer ID for user #{user.username}: #{original_gossamer_id}"
#
# # Fetch and import user files
# execute_query("SELECT * FROM gforum_User_Files WHERE ForeignColKey = #{original_gossamer_id}").each do |file|
# # Construct the file URL
# file_url = "https://forum.slot.com/images/users/images/#{file['ID'] % 10}/#{file['ID']}-#{file['File_Name']}"
# puts "User #{user.username} User ID: #{user.id} original_gossamer_id: #{original_gossamer_id} file_url: #{file_url}"
#
# new_bio = user.user_profile.bio_raw + "\n\n![#{file['File_Name']}](#{file_url})"
# if new_bio.length > 3000
# puts "Warning: About Me for user #{user.username} (ID: #{user.id}) exceeds 3000 characters after adding file link. Truncating."
# new_bio = new_bio[0, 3000]
# end
# user.user_profile.bio_raw = new_bio
# user.user_profile.save!
# end
# print "Importing files for user #{user.username}... Done.\n"
# end
# Import user files (profile images) from Gossamer Forums to Discourse
def import_user_files(user)
print "\rImporting files for user #{user.username}..."
original_gossamer_id = user.custom_fields['original_gossamer_id']
if original_gossamer_id.nil? || original_gossamer_id.empty?
puts "User #{user.username} does not have a valid original_gossamer_id. Skipping file import."
return
end
puts "Original Gossamer ID for user #{user.username}: #{original_gossamer_id}"
images_imported = 0
execute_query("SELECT * FROM gforum_User_Files WHERE ForeignColKey = #{original_gossamer_id}").each do |file|
file_url = "https://forum.slot.com/images/users/images/#{file['ID'] % 10}/#{file['ID']}-#{file['File_Name']}"
puts "User #{user.username} User ID: #{user.id} original_gossamer_id: #{original_gossamer_id} file_url: #{file_url}"
next unless file['ForeignColName'] =~ /^user_image\d+$/
puts "#A"
next unless ['image/jpeg', 'image/png'].include?(file['File_MimeType'])
puts "#B"
image_data = download_image(file_url)
next if image_data.nil?
puts "#C"
temp_file = Tempfile.new(['user_image', File.extname(file['File_Name'])])
temp_file.binmode
temp_file.write(image_data)
temp_file.rewind
if images_imported == 0
puts "#D"
upload = upload_image(user, temp_file, file['File_Name'], file_url)
next if upload.nil?
user.user_avatar = UserAvatar.create!(user_id: user.id, custom_upload_id: upload.id)
user.save!
# Set the Profile Header
UserProfile.find_by(user_id: user.id).update!(profile_background_upload_id: upload.id)
# Set the User Card Background
UserProfile.find_by(user_id: user.id).update!(card_background_upload_id: upload.id)
images_imported += 1
end
puts "#E"
user.user_profile.bio_raw ||= ""
user.user_profile.bio_raw += "\n\n![#{file['File_Name']}](#{file_url})"
user.user_profile.save!
temp_file.close
temp_file.unlink
end
print "Importing files for user #{user.username}... Done.\n"
end
# Import categories from Gossamer Forums to Discourse
def import_categories
puts "Importing categories (forums)..."
execute_query("SELECT * FROM gforum_Forum").each do |row|
# Only create category if it does not exist
unless CategoryCustomField.exists?(name: 'original_gossamer_id', value: row['forum_id'])
category_name = row['forum_name']
category_description = row['forum_desc'] || "No description provided"
puts "id #{row['forum_id']} name #{category_name} description #{category_description}"
# Create category in Discourse
category = create_category(
{
# id: row['forum_id'] + 10,
name: category_name,
description: category_description,
created_at: row['forum_last'] ? Time.at(row['forum_last']) : Time.now,
updated_at: row['forum_last'] ? Time.at(row['forum_last']) : Time.now
},
row['forum_id'] # import_id argument
)
# # Map Gossamer forum ID to Discourse category ID for future reference
# @forum_id_map[row['forum_id']] = category.id
# category.custom_fields.create!(name: 'original_gossamer_id', value: row['forum_id'])
category.custom_fields['original_gossamer_id'] = row['forum_id']
category.save!
# Store the user ID mapping
puts "for insert_category_id_mapping: category[:id] #{category[:id]} row['forum_id'] #{row['forum_id']}"
insert_category_id_mapping(row['forum_id'], category[:id])
end
end
puts "Importing categories... Done."
end
# Helper function to ensure title meets the minimum length requirement
def ensure_valid_title(title, min_length = 5)
if title.length < min_length
title += "." * (min_length - title.length) # Append dots to make it longer
end
title
end
# Import topics and posts from Gossamer Forums to Discourse
def import_topics_and_posts
puts "Importing topics and posts..."
# Execute the query to get all posts ordered by post_id
execute_query("SELECT * FROM gforum_Post ORDER BY post_id").each do |row|
puts "post_id #{row['post_id']} post_root_id #{row['post_root_id']} post_subject/title #{row['post_subject']} forum_id_fk/category_id #{row['forum_id_fk']}"
# discourse_user_id = @user_id_map[row['user_id_fk']]
discourse_user_id = fetch_user_id_mapping(row['user_id_fk'])
discourse_category_id = fetch_category_id_mapping(row['forum_id_fk'])
puts "discourse_user_id #{discourse_user_id} discourse_category_id #{discourse_category_id}"
next unless discourse_user_id && discourse_category_id
if row['post_root_id'] == 0
puts "#1"
# Ensure the title is valid
title = ensure_valid_title(row['post_subject'])
# Skip if the topic already exists
unless TopicCustomField.exists?(name: 'original_gossamer_id', value: row['post_id'])
# Create the topic
begin
puts "#2"
puts "CREATE TOPIC title #{title} discourse_user_id #{discourse_user_id} category_id #{discourse_category_id}"
topic = Topic.create!(
title: title,
user_id: discourse_user_id,
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_latest_reply']),
category_id: discourse_category_id
)
topic.custom_fields['original_gossamer_id'] = row['post_id']
topic.save!
# Create the initial post in the topic
puts "CREATE POST topic.id #{topic.id} discourse_user_id #{discourse_user_id}"
sanitized_post_message = row['post_message']&.tr("\0", '') || ""
post = Post.create!(
topic_id: topic.id,
user_id: discourse_user_id,
# raw: import_attachments(row['post_message'], row['post_id']),
# raw: row['post_message'] || "",
raw: sanitized_post_message,
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_latest_reply'])
)
post.custom_fields['original_gossamer_id'] = row['post_id']
post.save!
# Create URL mappings
# old_url = "https://old/forum/#{row['forum_name']}/topics/#{row['post_id']}"
new_url = "https://new/t/#{topic.slug}/#{topic.id}"
insert_url_mapping(row['post_id'], new_url, title)
rescue ActiveRecord::RecordInvalid => e
puts "Error importing topic with post_id #{row['post_id']}: #{e.message}"
end
end
else
puts "#3"
# Find the root topic for the post
root_topic_field = TopicCustomField.find_by(name: 'original_gossamer_id', value: row['post_root_id'])
if root_topic_field
topic_id = root_topic_field.topic_id
# Find the parent post for the reply
parent_post_field = PostCustomField.find_by(name: 'original_gossamer_id', value: row['post_father_id'])
reply_to_post_number = parent_post_field ? Post.find(parent_post_field.post_id).post_number : nil
# Create the post in the existing topic
begin
puts "#4"
sanitized_post_message = row['post_message']&.tr("\0", '') || ""
post = Post.create!(
topic_id: topic_id,
user_id: discourse_user_id,
# raw: import_attachments(row['post_message'], row['post_id']),
# raw: row['post_message'] || "",
raw: sanitized_post_message,
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_latest_reply']),
reply_to_post_number: reply_to_post_number
)
post.custom_fields['original_gossamer_id'] = row['post_id']
post.save!
rescue ActiveRecord::RecordInvalid => e
puts "Error importing post with post_id #{row['post_id']}: #{e.message}"
end
else
puts "Warning: Root topic not found for post_id #{row['post_id']} with post_root_id #{row['post_root_id']}"
end
end
end
end
# Import personal messages from gforum_Message table (both inbox and sent messages)
def import_personal_messages
puts "Importing personal (inbox and sendmail) messages..."
execute_query("SELECT * FROM gforum_Message").each do |row|
from_user_id = fetch_user_id_mapping(row['from_user_id_fk'])
to_user_id = fetch_user_id_mapping(row['to_user_id_fk'])
next unless from_user_id && to_user_id
# Skip if the message already exists
unless TopicCustomField.exists?(name: 'original_gossamer_msg_id', value: row['msg_id'])
# Sanitize the message, ensuring we have an empty string or the content without any \0
sanitized_message = row['msg_body']&.tr("\0", '') || ""
# Set default message body if the sanitized message is blank
sanitized_message = " " if sanitized_message.strip.empty?
# # If we do not change the "min personal message post length" to 1, we need this.
# sanitized_message = sanitized_message.ljust(10, ' ') if sanitized_message.length < 10
# Check and set a default title if the original title is nil or empty
title = row['msg_subject']&.strip
title = "<no subject>" if title.nil? || title.empty?
puts "IMPORTING title #{row['msg_subject']} user_id #{from_user_id} to_user_id #{to_user_id}"
# Create a private message topic in Discourse
topic = Topic.create!(
title: row['msg_subject'],
user_id: from_user_id,
archetype: Archetype.private_message,
created_at: Time.at(row['msg_time']),
updated_at: Time.at(row['msg_time'])
)
topic.custom_fields['original_gossamer_msg_id'] = row['msg_id']
topic.save!
# Create the message as a post in the private topic
post = Post.create!(
topic_id: topic.id,
user_id: from_user_id,
# raw: row['msg_body'],
raw: sanitized_message,
created_at: Time.at(row['msg_time']),
updated_at: Time.at(row['msg_time'])
)
post.custom_fields['original_gossamer_msg_id'] = row['msg_id']
post.save!
# Add recipient user to the private message topic
topic.topic_allowed_users.create!(user_id: to_user_id)
end
end
end
# Import attachments for a post
def import_post_attachments(post_message, post_id)
# Fetch attachments related to the post
attachments = execute_query("SELECT * FROM gforum_PostAttachment WHERE post_id_fk = #{post_id}")
attachments.each do |attachment|
# Append attachment links to the post message
file_url = "https://forum.slot.com/images/posts/attachments/#{attachment['ID'] % 10}/#{attachment['ID']}-#{attachment['File_Name']}"
post_message += "\n\n![#{attachment['File_Name']}](#{file_url})"
end
1# post_message
end
# Main method to perform the import
def perform_import
# Secret trick to disable RateLimiting protection in Discourse
RateLimiter.disable
# Set our unique timestamp for this migration run
timestamp = Time.now.strftime("-%y%m%d%H%M%S")
puts "Starting Gossamer Forums import... #{timestamp}"
import_users
export_username_mapping_to_csv("gossamer-migration-username-mapping#{timestamp}")
import_categories
import_topics_and_posts
export_url_mapping_to_csv("gossamer-migration-url-mapping#{timestamp}")
create_nginx_rewrite_rules("gossamer-redirects.conf")
import_personal_messages
# import_attachments
puts "Gossamer Forums import complete! #{timestamp}"
end
end
GossamerForumsImporter.new.perform_import