2024-08-14 06:28:44 +00:00
# Federated Computer, Inc.
# David Sainty <saint@federated.computer> 2024 A.D.
# Gossamer Threads to Discourse -- Migration-Import Script
2024-08-16 17:06:07 +00:00
# v0.39.7 Fixes/tweaks in calculate_dynamic_pool_size
2024-06-19 15:13:08 +00:00
2024-06-17 09:32:40 +00:00
require 'mysql2'
2024-06-17 09:35:50 +00:00
require 'open-uri'
require 'net/http'
2024-06-17 09:36:54 +00:00
require 'tempfile'
require 'sqlite3'
2024-06-17 09:35:50 +00:00
2024-06-19 15:13:08 +00:00
require 'digest'
require 'fileutils'
require 'csv'
2024-06-22 05:13:41 +00:00
require 'time'
2024-06-19 15:13:08 +00:00
2024-08-15 17:49:52 +00:00
require 'concurrent'
2024-08-16 15:06:25 +00:00
require 'sys/proctable'
2024-06-26 05:26:26 +00:00
require File . expand_path ( " ../../../../config/environment " , __FILE__ )
2024-08-14 06:28:44 +00:00
# require_relative '../base'
require File . expand_path ( " ../../../../script/import_scripts/base " , __FILE__ )
2024-06-17 09:32:40 +00:00
class GossamerForumsImporter < ImportScripts :: Base
def initialize
super
2024-06-17 09:36:54 +00:00
begin
# Initialize MySQL client to connect to Gossamer Forums database
@mysql_client = Mysql2 :: Client . new (
host : " slowtwitch.northend.network " ,
username : " admin " ,
password : " yxnh93Ybbz2Nm8 # mp28zCVv " ,
database : " slowtwitch "
)
rescue Mysql2 :: Error = > e
puts " Error connecting to MySQL: #{ e . message } "
exit 1
end
# # Create a mapping of old Gossamer user IDs to new Discourse user IDs
# @user_id_map = {}
2024-06-19 15:13:08 +00:00
initialize_sqlite_id_name_url_db
2024-06-17 09:36:54 +00:00
end
2024-06-19 15:13:08 +00:00
def initialize_sqlite_id_name_url_db
2024-08-15 16:41:35 +00:00
@db = SQLite3 :: Database . new '/bitnami/discourse/sqlite/id_name_url_map.db'
2024-06-17 09:36:54 +00:00
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS user_id_map (
old_user_id INTEGER PRIMARY KEY ,
new_user_id INTEGER
) ;
SQL
2024-06-17 09:37:13 +00:00
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS category_id_map (
old_category_id INTEGER PRIMARY KEY ,
new_category_id INTEGER
) ;
SQL
2024-06-19 15:13:08 +00:00
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS username_map (
id INTEGER PRIMARY KEY ,
old_username TEXT ,
new_username TEXT ,
email TEXT ,
real_name TEXT
) ;
SQL
2024-06-28 14:42:23 +00:00
# ONLY when we need to clear the url_map e.g. if reimporting topics-posts from scratch
2024-07-01 03:00:12 +00:00
# @db.execute <<-SQL
# DROP TABLE IF EXISTS url_map;
# SQL
2024-06-28 14:42:23 +00:00
2024-06-28 08:56:38 +00:00
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS url_map (
2024-06-20 09:23:30 +00:00
old_post_id INTEGER PRIMARY KEY ,
2024-06-20 03:28:14 +00:00
new_url TEXT ,
title TEXT
) ;
SQL
2024-06-28 12:25:26 +00:00
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS topic_last_post_time (
topic_id INTEGER PRIMARY KEY ,
last_post_time INTEGER
) ;
SQL
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS topic_post_count (
topic_id INTEGER PRIMARY KEY ,
post_count INTEGER DEFAULT 0
) ;
SQL
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS user_topic_count (
user_id INTEGER PRIMARY KEY ,
topic_count INTEGER DEFAULT 0
) ;
SQL
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS user_post_count (
user_id INTEGER PRIMARY KEY ,
post_count INTEGER DEFAULT 0
) ;
SQL
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS topic_last_post_user (
topic_id INTEGER PRIMARY KEY ,
user_id INTEGER
) ;
SQL
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS topic_post_numbers (
topic_id INTEGER PRIMARY KEY ,
post_number INTEGER DEFAULT 0
) ;
SQL
2024-07-01 06:08:07 +00:00
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS highest_processed_post_id (
id INTEGER PRIMARY KEY CHECK ( id = 1 ) ,
post_id INTEGER
) ;
SQL
2024-07-04 15:28:39 +00:00
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS highest_processed_personal_id (
id INTEGER PRIMARY KEY CHECK ( id = 1 ) ,
personal_id INTEGER
) ;
SQL
2024-08-16 15:06:25 +00:00
@db . execute <<-SQL
CREATE TABLE IF NOT EXISTS topic_import_status (
post_id INTEGER PRIMARY KEY ,
status INTEGER DEFAULT 0
) ;
SQL
2024-06-17 09:36:54 +00:00
end
def insert_user_id_mapping ( old_user_id , new_user_id )
@db . execute " INSERT OR REPLACE INTO user_id_map (old_user_id, new_user_id) VALUES (?, ?) " , old_user_id , new_user_id
end
def fetch_user_id_mapping ( old_user_id )
@db . get_first_value " SELECT new_user_id FROM user_id_map WHERE old_user_id = ? " , old_user_id
2024-06-17 09:32:40 +00:00
end
2024-06-17 09:37:13 +00:00
def insert_category_id_mapping ( old_category_id , new_category_id )
@db . execute " INSERT OR REPLACE INTO category_id_map (old_category_id, new_category_id) VALUES (?, ?) " , old_category_id , new_category_id
end
def fetch_category_id_mapping ( old_category_id )
@db . get_first_value " SELECT new_category_id FROM category_id_map WHERE old_category_id = ? " , old_category_id
end
2024-06-19 15:13:08 +00:00
def insert_username_mapping ( old_username , new_username , email , real_name )
2024-08-16 15:57:32 +00:00
@db . execute " INSERT OR REPLACE INTO username_map (old_username, new_username, email, real_name) VALUES (?, ?, ?, ?) " , old_username , new_username , email , real_name
2024-06-19 15:13:08 +00:00
end
# Define a method to export the username mapping table to a CSV file
def export_username_mapping_to_csv ( filename )
2024-06-20 09:23:30 +00:00
CSV . open ( filename , 'w' ) do | csv |
2024-06-19 15:13:08 +00:00
# Add headers
csv << [ 'Old Username' , 'New Username' , 'Email' , 'Full Name' ]
# Fetch data from the database
@db . execute ( " SELECT old_username, new_username, email, real_name FROM username_map " ) do | row |
csv << row
end
end
2024-06-20 03:28:14 +00:00
puts " Exported changed username mappings to #{ filename } "
end
# Insert a URL mapping into the SQLite database
2024-06-20 09:23:30 +00:00
def insert_url_mapping ( old_post_id , new_url , title )
2024-06-28 08:56:38 +00:00
@db . execute " INSERT INTO url_map (old_post_id, new_url, title) VALUES (?, ?, ?) " , [ old_post_id , new_url , title ]
2024-06-20 03:28:14 +00:00
end
# Export the URL mappings to a CSV file
def export_url_mapping_to_csv ( filename )
2024-06-20 09:23:30 +00:00
CSV . open ( filename , " w " ) do | csv |
2024-06-20 03:28:14 +00:00
# Add headers
2024-06-20 09:23:30 +00:00
csv << [ " Old Post ID " , " New URL " , " Title " ]
2024-06-28 08:56:38 +00:00
@db . execute ( " SELECT old_post_id, new_url, title FROM url_map " ) do | row |
2024-06-20 03:28:14 +00:00
csv << row
end
end
puts " Exported URL mappings to #{ filename } "
2024-06-19 15:13:08 +00:00
end
2024-06-20 09:23:30 +00:00
# Method to create Nginx rewrite rules file
def create_nginx_rewrite_rules ( filename )
File . open ( filename , " w " ) do | file |
2024-06-28 08:56:38 +00:00
@db . execute ( " SELECT old_post_id, new_url FROM url_map " ) do | row |
2024-06-20 09:23:30 +00:00
old_post_id , new_url = row
file . puts " rewrite ^/forum/.*P #{ old_post_id } /$ #{ new_url } permanent; "
end
end
end
2024-06-19 15:13:08 +00:00
2024-07-01 06:08:07 +00:00
# Fetch the highest old_post_id from the url_map table
def fetch_highest_old_post_id
@db . get_first_value " SELECT MAX(old_post_id) FROM url_map "
end
2024-06-28 12:25:26 +00:00
# Helper methods to interact with the SQLite database for persisting topic-post related values
2024-06-28 16:17:30 +00:00
def fetch_db ( table , key )
2024-06-28 12:25:26 +00:00
@db . get_first_value " SELECT value FROM #{ table } WHERE key = ? " , key
end
def update_db ( table , key , value )
@db . execute " INSERT OR REPLACE INTO #{ table } (key, value) VALUES (?, ?) " , key , value
end
2024-06-28 16:17:30 +00:00
def fetch_db_topic_last_post_time ( topic_id )
@db . get_first_value " SELECT last_post_time FROM topic_last_post_time WHERE topic_id = ? " , topic_id
end
def fetch_db_topic_last_post_user ( topic_id )
@db . get_first_value " SELECT user_id FROM topic_last_post_user WHERE topic_id = ? " , topic_id
end
def fetch_db_topic_post_count ( topic_id )
@db . get_first_value " SELECT post_count FROM topic_post_count WHERE topic_id = ? " , topic_id
end
def fetch_db_user_topic_count ( user_id )
@db . get_first_value " SELECT topic_count FROM user_topic_count WHERE user_id = ? " , user_id
end
def fetch_db_user_post_count ( user_id )
@db . get_first_value " SELECT post_count FROM user_post_count WHERE user_id = ? " , user_id
end
def fetch_db_topic_post_numbers ( topic_id )
@db . get_first_value " SELECT post_number FROM topic_post_numbers WHERE topic_id = ? " , topic_id
end
def update_db_topic_last_post_time ( topic_id , last_post_time )
@db . execute " INSERT OR REPLACE INTO topic_last_post_time (topic_id, last_post_time) VALUES (?, ?) " , topic_id , last_post_time
end
def update_db_topic_last_post_user ( topic_id , user_id )
@db . execute " INSERT OR REPLACE INTO topic_last_post_user (topic_id, user_id) VALUES (?, ?) " , topic_id , user_id
end
def update_db_topic_post_count ( topic_id , post_count )
@db . execute " INSERT OR REPLACE INTO topic_post_count (topic_id, post_count) VALUES (?, ?) " , topic_id , post_count
end
def update_db_user_topic_count ( user_id , topic_count )
@db . execute " INSERT OR REPLACE INTO user_topic_count (user_id, topic_count) VALUES (?, ?) " , user_id , topic_count
end
def update_db_user_post_count ( user_id , post_count )
@db . execute " INSERT OR REPLACE INTO user_post_count (user_id, post_count) VALUES (?, ?) " , user_id , post_count
end
def update_db_topic_post_numbers ( topic_id , post_number )
@db . execute " INSERT OR REPLACE INTO topic_post_numbers (topic_id, post_number) VALUES (?, ?) " , topic_id , post_number
end
2024-07-01 06:08:07 +00:00
# Fetch the highest processed post_id from the highest_processed_post_id table
def fetch_highest_processed_post_id
@db . get_first_value " SELECT post_id FROM highest_processed_post_id WHERE id = 1 "
end
# Update the highest processed post_id in the highest_processed_post_id table
def update_highest_processed_post_id ( post_id )
@db . execute " INSERT OR REPLACE INTO highest_processed_post_id (id, post_id) VALUES (1, ?) " , post_id
end
2024-07-04 15:28:39 +00:00
# Fetch the highest processed personal_id from the highest_processed_personal_id table
def fetch_highest_processed_personal_id
@db . get_first_value " SELECT personal_id FROM highest_processed_personal_id WHERE id = 1 "
end
# Update the highest processed personal_id in the highest_processed_personal_id table
def update_highest_processed_personal_id ( personal_id )
@db . execute " INSERT OR REPLACE INTO highest_processed_personal_id (id, personal_id) VALUES (1, ?) " , personal_id
end
2024-06-28 16:17:30 +00:00
2024-08-16 15:06:25 +00:00
# Check if post_id exists and its status
def post_status ( post_id )
result = @db . execute ( " SELECT status FROM topic_import_status WHERE post_id = ? " , post_id ) . flatten . first
result . nil? ? nil : result . to_i
end
# Mark post_id as complete
def mark_post_as_complete ( post_id )
@db . execute ( " INSERT OR REPLACE INTO topic_import_status (post_id, status) VALUES (?, 1) " , post_id )
end
# Mark post_id as failed
def mark_post_as_failed ( post_id )
@db . execute ( " INSERT OR REPLACE INTO topic_import_status (post_id, status) VALUES (?, 0) " , post_id )
end
2024-06-17 09:35:50 +00:00
# Execute an SQL query on the Gossamer Forums database
2024-06-17 09:32:40 +00:00
def execute_query ( query )
@mysql_client . query ( query , as : :hash )
end
2024-06-17 09:35:50 +00:00
# Sanitize the username to meet Discourse's requirements
2024-06-19 15:13:08 +00:00
def sanitize_username ( original_username , email , real_name )
# original_username = username
2024-06-26 05:49:43 +00:00
sanitized_username = original_username . gsub ( / [^a-zA-Z0-9._-] / , '_' )
2024-06-19 15:13:08 +00:00
sanitized_username = " #{ sanitized_username } . " if sanitized_username . length < 2 # Allow two-character usernames
sanitized_username = sanitized_username [ 0 , 20 ] if sanitized_username . length > 20
firststep_sanitized = sanitized_username
existing_user = User . find_by ( username : sanitized_username )
2024-06-17 09:33:54 +00:00
if existing_user
2024-06-26 11:01:52 +00:00
if existing_user . email . downcase == email . downcase && existing_user . name == real_name
# The existing user with the username the same as the current proposed sanitised name _is_ the same person...
2024-06-19 15:13:08 +00:00
return sanitized_username
2024-06-17 09:33:54 +00:00
else
2024-06-26 11:01:52 +00:00
# We cannot clobber another person with the same proposed username, so we resolve the conflict
2024-06-17 09:33:54 +00:00
counter = 1
2024-06-19 15:13:08 +00:00
while User . exists? ( username : sanitized_username )
sanitized_username = " #{ firststep_sanitized } _ #{ counter } "
sanitized_username = sanitized_username [ 0 , 20 ] if sanitized_username . length > 20
2024-06-17 09:33:54 +00:00
counter += 1
end
end
end
2024-06-19 15:13:08 +00:00
if original_username != sanitized_username
# The Discourse username is not the same as the Gossamer Forums username
puts " Sanitized username: ' #{ original_username } ' --> ' #{ sanitized_username } ' "
insert_username_mapping ( original_username , sanitized_username , email , real_name )
2024-06-17 09:34:34 +00:00
# else
2024-06-19 15:13:08 +00:00
# puts "UNsanitized username: '#{original_username}' --> '#{sanitized_username}'"
2024-06-17 09:34:34 +00:00
end
2024-06-17 09:33:54 +00:00
2024-06-19 15:13:08 +00:00
sanitized_username
2024-06-17 09:33:54 +00:00
end
# Sanitize email to replace restricted domains
def sanitize_email ( email )
restricted_domains = [ 'mailinator.com' , 'example.com' ] # Add more restricted domains as needed
domain = email . split ( '@' ) . last
if restricted_domains . include? ( domain )
2024-06-17 09:34:34 +00:00
sanitized_email = email . gsub ( domain , 'example.org' ) # Change to a permissible domain
2024-06-17 09:33:54 +00:00
puts " Sanitized email: ' #{ email } ' --> ' #{ sanitized_email } ' "
return sanitized_email
end
email
2024-06-17 09:33:28 +00:00
end
2024-07-01 08:49:59 +00:00
def generate_unique_title ( base_title , user_id , timestamp )
unique_title = base_title
suffix = 1
2024-07-02 04:34:24 +00:00
puts " Generating unique title for base title: #{ base_title } "
2024-07-02 04:43:03 +00:00
while Topic . find_by ( title : unique_title )
# while Topic.where(title: unique_title).exists?
2024-07-02 04:34:24 +00:00
# while Topic.exists?(title: unique_title)
puts " Title ' #{ unique_title } ' already exists, generating a new one. "
2024-07-01 08:49:59 +00:00
unique_title = " #{ base_title } ( #{ suffix } ) "
2024-07-02 04:23:16 +00:00
puts " NEW unique_title #{ unique_title } "
2024-07-01 08:49:59 +00:00
suffix += 1
end
2024-07-02 04:34:24 +00:00
puts " Final unique title: #{ unique_title } "
2024-07-01 08:49:59 +00:00
unique_title
end
2024-06-22 05:13:41 +00:00
# Helper method to download an attachment / image from a URL
def download_attachment ( url )
2024-06-17 09:35:50 +00:00
begin
2024-06-26 11:35:00 +00:00
puts " URL: ' #{ url } ' "
2024-06-26 11:57:32 +00:00
URI . open ( url , ssl_verify_mode : OpenSSL :: SSL :: VERIFY_NONE ) . read
2024-06-17 09:35:50 +00:00
rescue OpenURI :: HTTPError = > e
2024-06-22 05:13:41 +00:00
puts " Failed to download attachment from #{ url } : #{ e . message } "
2024-06-17 09:35:50 +00:00
nil
2024-06-17 09:37:13 +00:00
rescue URI :: InvalidURIError = > e
puts " Failed to handle invalid URL/URI for #{ url } : #{ e . message } "
nil
2024-06-17 09:35:50 +00:00
end
end
2024-06-22 05:13:41 +00:00
# Helper method to upload an attachment / image to Discourse
2024-07-01 02:57:53 +00:00
def upload_attachment ( user_id , file , filename , gossamer_url )
2024-08-14 15:26:38 +00:00
begin
2024-07-01 11:07:58 +00:00
# BAD
# upload = Upload.create!(
# user_id: user_id,
# original_filename: filename,
# filesize: file.size,
# # filesize: File.size(file.path),
# # content_type: `file --brief --mime-type #{file.path}`.strip,
# # sha1: Digest::SHA1.file(file.path).hexdigest,
# # origin: 'user_avatar',
# # retain_hours: nil,
# url: gossamer_url
# )
# # Error -- non-existent method upload.ensure_consistency!
# Use the correct content type and SHA1 hash for the upload
2024-07-02 03:15:07 +00:00
# content_type = `file --brief --mime-type #{file.path}`.strip
2024-07-02 10:01:46 +00:00
# Generate SHA1 hash for the file
sha1 = Digest :: SHA1 . file ( file . path ) . hexdigest
2024-08-14 11:55:56 +00:00
2024-08-14 14:32:36 +00:00
# Ensure the file size is correctly handled
file_size = File . size ( file . path )
puts " Attempting to upload attachment. filename #{ filename } user_id #{ user_id } file.size #{ file . size } file_size #{ file_size } sha1 #{ sha1 } gossamer_url #{ gossamer_url } "
2024-07-01 11:07:58 +00:00
2024-08-14 11:55:56 +00:00
# Create the upload record, adjusting fields as needed for 3.2.5 compatibility
2024-08-14 12:13:31 +00:00
# upload = Upload.create!(
upload = Upload . new (
2024-07-01 02:57:53 +00:00
user_id : user_id ,
2024-06-20 09:23:30 +00:00
original_filename : filename ,
2024-08-14 12:13:31 +00:00
filesize : file . size ,
2024-07-02 10:01:46 +00:00
sha1 : sha1 ,
url : gossamer_url
2024-07-02 03:15:07 +00:00
# content_type: content_type,
# origin: 'composer',
# retain_hours: nil
2024-06-20 09:23:30 +00:00
)
2024-07-01 11:07:58 +00:00
2024-08-14 13:47:51 +00:00
# Save the upload object before generating the file path
upload . save!
2024-08-14 12:13:31 +00:00
# Use FileStore::LocalStore to store the file
store = FileStore :: LocalStore . new
upload . url = store . store_file ( file , store . get_path_for ( 'original' , upload . id , upload . sha1 , File . extname ( file . path ) ) )
2024-07-02 10:01:46 +00:00
# # Use Discourse's internal method to upload the file
# file.rewind
# Discourse.store.upload(file, upload.sha1)
2024-07-02 09:47:07 +00:00
# # Move the file to the correct location
# upload_path = Upload.get_path_for_file(upload.sha1)
# FileUtils.mkdir_p(File.dirname(upload_path))
2024-07-02 03:15:07 +00:00
# FileUtils.mv(file.path, upload.path)
2024-07-01 11:07:58 +00:00
2024-07-02 10:01:46 +00:00
# Save the upload object
2024-06-20 09:23:30 +00:00
upload . save!
2024-08-14 11:55:56 +00:00
2024-08-14 12:13:31 +00:00
# # Check if further processing or compatibility handling is needed
# if Discourse.respond_to?(:store)
# store = Discourse.store
# upload.url = store.store_file(file, store.get_path_for('original', upload.id, upload.sha1, File.extname(file.path)))
# else
# # Fallback for earlier file store methods
# upload_path = Upload.get_path_for_file(upload.sha1)
# FileUtils.mkdir_p(File.dirname(upload_path))
# FileUtils.mv(file.path, upload_path)
# end
2024-08-14 11:55:56 +00:00
# Return the upload object
2024-06-20 09:23:30 +00:00
upload
2024-08-14 15:26:38 +00:00
rescue = > e
puts " FAILURE: Failed to upload attachment #{ filename } for user_id #{ user_id } : #{ e . message } "
nil
end
2024-06-17 09:35:50 +00:00
end
2024-06-17 09:37:13 +00:00
2024-07-03 04:39:09 +00:00
# Helper method to upload an attachment / image to Discourse
def upload_attachment_two ( user_id , file , filename )
begin
upload = UploadCreator . new ( file , filename , origin : 'import' ) . create_for ( user_id )
2024-08-14 11:55:56 +00:00
# Raise an erorr if the upload fails to catch issues early
2024-07-03 04:39:09 +00:00
raise " Upload failed " unless upload
2024-08-14 11:55:56 +00:00
2024-07-03 04:39:09 +00:00
upload
rescue = > e
puts " Failed to upload attachment #{ filename } for user_id #{ user_id } : #{ e . message } "
nil
end
end
2024-06-22 05:13:41 +00:00
# Helper method to handle post attachments
def handle_post_attachments ( gossamer_post_id , post , user_id )
2024-06-28 00:55:58 +00:00
execute_query ( " SELECT * FROM gforum_PostAttachment WHERE post_id_fk = #{ gossamer_post_id } " ) . each do | att_row |
2024-06-22 05:13:41 +00:00
attachment_url = " https://forum.slowtwitch.com/forum/?do=post_attachment;postatt_id= #{ att_row [ 'postatt_id' ] } "
2024-07-01 06:20:17 +00:00
puts " Handling attachment: #{ attachment_url } "
2024-06-22 05:13:41 +00:00
attachment_data = download_attachment ( attachment_url )
next unless attachment_data
mime_type = att_row [ 'postatt_content' ]
2024-07-02 03:15:07 +00:00
2024-07-03 04:39:09 +00:00
# Create a Tempfile to pass to the UploadCreator
2024-06-22 05:13:41 +00:00
temp_file = Tempfile . new ( [ 'attachment' , File . extname ( att_row [ 'postatt_filename' ] ) ] )
temp_file . binmode
temp_file . write ( attachment_data )
temp_file . rewind
2024-07-01 06:20:17 +00:00
puts " Attempting upload... "
2024-07-03 04:46:50 +00:00
upload = upload_attachment_two ( user_id , temp_file , att_row [ 'postatt_filename' ] )
2024-06-22 05:13:41 +00:00
next unless upload
2024-07-01 11:07:58 +00:00
# Get the URL of the uploaded file from Discourse
2024-07-02 03:15:07 +00:00
# upload_url = Upload.get_from_url(upload.url).url
upload_url = upload . url
2024-07-01 06:20:17 +00:00
puts " Appending to post.raw... #{ upload_url } MIME type: #{ mime_type } "
2024-06-22 05:13:41 +00:00
if mime_type . start_with? ( 'image/' )
post . raw += " \n ![ #{ att_row [ 'postatt_filename' ] } ]( #{ upload_url } ) "
else
post . raw += " \n [ #{ att_row [ 'postatt_filename' ] } ]( #{ upload_url } ) "
end
post . save!
temp_file . close
temp_file . unlink
end
end
2024-06-17 09:37:13 +00:00
# def download_file(url)
# require 'open-uri'
# begin
# file = Tempfile.new
# file.binmode
# file.write(URI.open(url).read)
# file.rewind
# file
# rescue => e
# puts "Failed to download file from #{url}: #{e.message}"
# nil
# end
# end
# Helper method to upload an image to Discourse
# def upload_image(user, image_data, filename)
# return if image_data.nil?
#
# upload = Upload.create_for(user.id, File.open(image_data.path), filename, 'image/jpeg')
# if upload.nil? || !upload.persisted?
# puts "Failed to upload image for user #{user.username}"
# return
# end
#
# upload
# end
2024-06-17 09:35:50 +00:00
2024-07-10 04:12:38 +00:00
# Add 'Former_User' account if not already present
def add_former_user
puts " Adding 'Former User' account if it does not exist... "
former_user = User . find_by ( username : 'Former_User' )
if former_user . nil?
former_user = User . create! (
username : 'Former_User' ,
name : 'Former User' ,
email : 'former_user@example.com' ,
password : SecureRandom . hex ( 16 ) ,
active : true
)
puts " 'Former User' account created with user ID: #{ former_user . id } "
# Store the user ID mapping
puts " for insert_user_id_mapping: 0 (former_user) discourse_user.id #{ former_user . id } "
insert_user_id_mapping ( 0 , former_user . id )
else
puts " 'Former User' account already exists with user ID: #{ former_user . id } "
end
end
2024-06-17 09:35:50 +00:00
# Import users from Gossamer Forums to Discourse
2024-06-17 09:32:40 +00:00
def import_users
2024-08-16 15:06:25 +00:00
puts " Importing Users... "
2024-06-17 09:32:40 +00:00
users = [ ]
2024-06-17 09:35:13 +00:00
# Fetch all users from Gossamer Forums
2024-06-17 09:32:40 +00:00
execute_query ( " SELECT * FROM gforum_User " ) . each do | row |
users << {
id : row [ 'user_id' ] ,
2024-06-17 09:35:13 +00:00
username : sanitize_username ( row [ 'user_username' ] , row [ 'user_email' ] , row [ 'user_real_name' ] ) ,
2024-06-17 09:32:40 +00:00
email : row [ 'user_email' ] ,
created_at : Time . at ( row [ 'user_registered' ] ) ,
updated_at : Time . at ( row [ 'user_last_seen' ] ) ,
name : row [ 'user_real_name' ] ,
title : row [ 'user_title' ] ,
2024-06-17 09:34:34 +00:00
bio_raw : row [ 'user_about' ] || " " ,
2024-06-17 09:32:40 +00:00
website : row [ 'user_homepage' ] ,
2024-06-17 09:34:34 +00:00
location : row [ 'user_location' ] ,
custom_fields : {
2024-06-17 09:35:13 +00:00
md5_password : row [ 'user_password' ] ,
original_username : row [ 'user_username' ] ,
original_gossamer_id : row [ 'user_id' ]
2024-06-17 09:34:34 +00:00
}
2024-06-17 09:32:40 +00:00
}
end
2024-06-17 09:35:13 +00:00
# Create or update users in Discourse
2024-06-17 09:32:40 +00:00
create_users ( users ) do | user |
2024-06-17 09:36:54 +00:00
# insert_user_id_mapping(user[:id], user.id)
2024-06-17 09:32:40 +00:00
user
end
2024-08-16 15:06:25 +00:00
end
# Generate SQLite user ID mapping between Discourse and Gossamer
def generate_user_id_mapping
puts " Generating User ID Mapping... "
2024-08-16 16:16:15 +00:00
users = [ ]
# Fetch all users from Gossamer Forums
execute_query ( " SELECT * FROM gforum_User " ) . each do | row |
users << {
id : row [ 'user_id' ] ,
username : sanitize_username ( row [ 'user_username' ] , row [ 'user_email' ] , row [ 'user_real_name' ] ) ,
email : row [ 'user_email' ] ,
created_at : Time . at ( row [ 'user_registered' ] ) ,
updated_at : Time . at ( row [ 'user_last_seen' ] ) ,
name : row [ 'user_real_name' ] ,
title : row [ 'user_title' ] ,
bio_raw : row [ 'user_about' ] || " " ,
website : row [ 'user_homepage' ] ,
location : row [ 'user_location' ] ,
custom_fields : {
md5_password : row [ 'user_password' ] ,
original_username : row [ 'user_username' ] ,
original_gossamer_id : row [ 'user_id' ]
}
}
end
2024-06-17 09:32:40 +00:00
2024-08-16 15:06:25 +00:00
# For each user, add user ID mapping to SQLite now that we know what the Discourse user ID is
2024-06-26 09:42:15 +00:00
users . each do | user |
2024-06-19 15:13:08 +00:00
# discourse_username = sanitize_username(user[:username], user[:email], user[:name])
2024-06-26 09:42:15 +00:00
discourse_username = user [ :username ]
discourse_user = User . find_by ( username : discourse_username )
2024-06-17 09:35:13 +00:00
2024-06-26 09:42:15 +00:00
if discourse_user . nil?
2024-08-16 15:06:25 +00:00
puts " User #{ user [ :username ] } --> #{ discourse_username } not found in Discourse. Skipping user mapping addition. "
2024-06-26 09:42:15 +00:00
next
end
2024-06-17 09:33:54 +00:00
2024-08-16 15:06:25 +00:00
# Store the user ID mapping
2024-06-17 09:36:54 +00:00
# @user_id_map[user[:id]] = discourse_user.id
2024-06-17 09:37:13 +00:00
puts " for insert_user_id_mapping: user[:id] #{ user [ :id ] } discourse_user.id #{ discourse_user . id } "
2024-06-17 09:36:54 +00:00
insert_user_id_mapping ( user [ :id ] , discourse_user . id )
2024-08-16 15:06:25 +00:00
end
end
# Import and set user Bio and Images
def set_user_bio_images
puts " Setting User Bio and Images... "
2024-08-16 16:16:15 +00:00
users = [ ]
# Fetch all users from Gossamer Forums
execute_query ( " SELECT * FROM gforum_User " ) . each do | row |
users << {
id : row [ 'user_id' ] ,
username : sanitize_username ( row [ 'user_username' ] , row [ 'user_email' ] , row [ 'user_real_name' ] ) ,
email : row [ 'user_email' ] ,
created_at : Time . at ( row [ 'user_registered' ] ) ,
updated_at : Time . at ( row [ 'user_last_seen' ] ) ,
name : row [ 'user_real_name' ] ,
title : row [ 'user_title' ] ,
bio_raw : row [ 'user_about' ] || " " ,
website : row [ 'user_homepage' ] ,
location : row [ 'user_location' ] ,
2024-08-16 16:22:25 +00:00
custom_fields : {
2024-08-16 16:16:15 +00:00
md5_password : row [ 'user_password' ] ,
original_username : row [ 'user_username' ] ,
original_gossamer_id : row [ 'user_id' ]
}
}
end
2024-08-16 15:06:25 +00:00
# For each user, append user bio and import user files
users . each do | user |
# discourse_username = sanitize_username(user[:username], user[:email], user[:name])
discourse_username = user [ :username ]
discourse_user = User . find_by ( username : discourse_username )
if discourse_user . nil?
puts " User #{ user [ :username ] } --> #{ discourse_username } not found in Discourse. Skipping bio-image setting. "
next
end
2024-06-17 09:36:54 +00:00
# Ensure user profile exists and bio_raw is a string
discourse_user . user_profile || = UserProfile . new ( user_id : discourse_user . id )
2024-06-17 09:35:50 +00:00
discourse_user . user_profile . bio_raw || = " "
2024-06-17 09:36:54 +00:00
# Append bio if it exists, otherwise set it to empty string to avoid nil errors
2024-06-17 09:35:50 +00:00
if discourse_user . user_profile . bio_raw . empty?
2024-06-17 09:35:13 +00:00
discourse_user . user_profile . bio_raw = user [ :bio_raw ]
else
discourse_user . user_profile . bio_raw += " \n \n " + user [ :bio_raw ]
end
2024-06-17 09:35:50 +00:00
# Ensure the bio does not exceed 3000 characters
if discourse_user . user_profile . bio_raw . length > 3000
2024-06-17 09:35:13 +00:00
puts " Warning: About Me for user #{ discourse_user . username } (ID: #{ discourse_user . id } ) exceeds 3000 characters. Truncating. "
2024-06-17 09:35:50 +00:00
discourse_user . user_profile . bio_raw = discourse_user . user_profile . bio_raw [ 0 , 3000 ]
2024-06-17 09:35:13 +00:00
end
2024-06-17 09:34:34 +00:00
discourse_user . user_profile . save!
2024-06-17 09:32:40 +00:00
2024-06-17 09:35:50 +00:00
# Import user files
2024-08-15 15:16:32 +00:00
# TEMPORARY!! -- Use if script is interrupted mid-user-importing.
# if user[:id] > 353
import_user_files ( discourse_user )
# end
2024-06-17 09:32:40 +00:00
end
end
2024-06-17 09:35:50 +00:00
# # Import user files from Gossamer Forums to Discourse
# def import_user_files(user)
# print "\rImporting files for user #{user.username}..."
#
# original_gossamer_id = user.custom_fields['original_gossamer_id']
# if original_gossamer_id.nil? || original_gossamer_id.empty?
# puts "User #{user.username} does not have a valid original_gossamer_id. Skipping file import."
# return
# end
#
# # puts "Original Gossamer ID for user #{user.username}: #{original_gossamer_id}"
#
# # Fetch and import user files
# execute_query("SELECT * FROM gforum_User_Files WHERE ForeignColKey = #{original_gossamer_id}").each do |file|
# # Construct the file URL
# file_url = "https://forum.slowtwitch.com/images/users/images/#{file['ID'] % 10}/#{file['ID']}-#{file['File_Name']}"
# puts "User #{user.username} User ID: #{user.id} original_gossamer_id: #{original_gossamer_id} file_url: #{file_url}"
#
# new_bio = user.user_profile.bio_raw + "\n\n![#{file['File_Name']}](#{file_url})"
# if new_bio.length > 3000
# puts "Warning: About Me for user #{user.username} (ID: #{user.id}) exceeds 3000 characters after adding file link. Truncating."
# new_bio = new_bio[0, 3000]
# end
# user.user_profile.bio_raw = new_bio
# user.user_profile.save!
# end
# print "Importing files for user #{user.username}... Done.\n"
# end
2024-06-26 15:30:05 +00:00
# Helper method to convert TIFF to PNG
def convert_tiff_to_png ( file_path )
png_path = file_path . sub ( '.tiff' , '.png' ) . sub ( '.tif' , '.png' )
2024-06-27 15:44:01 +00:00
begin
system ( " convert #{ file_path } #{ png_path } " )
png_path if File . exist? ( png_path )
rescue = > e
puts " Failed to convert image #{ file_path } : #{ e . message } "
nil
end
2024-06-26 15:30:05 +00:00
end
# Helper method to resize an image to specified dimensions
def resize_image ( file_path , max_width , max_height )
resized_path = file_path . sub ( File . extname ( file_path ) , " _resized #{ File . extname ( file_path ) } " )
2024-06-27 06:22:12 +00:00
begin
system ( " convert #{ file_path } -resize #{ max_width } x #{ max_height } #{ resized_path } " )
resized_path if File . exist? ( resized_path )
rescue = > e
puts " Failed to resize image #{ file_path } : #{ e . message } "
nil
end
2024-06-27 13:16:06 +00:00
end
2024-06-26 15:30:05 +00:00
2024-06-27 14:46:16 +00:00
# Create avatar for the user using the provided image path
def create_avatar ( user , avatar_path )
tempfile = Tempfile . new ( [ 'avatar' , File . extname ( avatar_path ) ] )
FileUtils . copy_file ( avatar_path , tempfile . path )
filename = " avatar #{ File . extname ( avatar_path ) } "
upload = UploadCreator . new ( tempfile , filename , type : " avatar " ) . create_for ( user . id )
if upload . present? && upload . persisted?
user . create_user_avatar
user . user_avatar . update ( custom_upload_id : upload . id )
user . update ( uploaded_avatar_id : upload . id )
else
puts " Failed to upload avatar for user #{ user . username } : #{ avatar_path } "
puts upload . errors . inspect if upload
end
rescue StandardError = > e
puts " Failed to create avatar for user #{ user . username } : #{ avatar_path } . Error: #{ e . message } "
ensure
tempfile . close! if tempfile
end
2024-06-17 09:35:50 +00:00
# Import user files (profile images) from Gossamer Forums to Discourse
2024-06-17 09:32:40 +00:00
def import_user_files ( user )
2024-06-17 09:33:28 +00:00
print " \r Importing files for user #{ user . username } ... "
2024-06-17 09:35:50 +00:00
2024-06-17 09:34:34 +00:00
original_gossamer_id = user . custom_fields [ 'original_gossamer_id' ]
if original_gossamer_id . nil? || original_gossamer_id . empty?
puts " User #{ user . username } does not have a valid original_gossamer_id. Skipping file import. "
return
end
2024-06-17 09:35:50 +00:00
puts " Original Gossamer ID for user #{ user . username } : #{ original_gossamer_id } "
images_imported = 0
2024-06-17 09:34:34 +00:00
execute_query ( " SELECT * FROM gforum_User_Files WHERE ForeignColKey = #{ original_gossamer_id } " ) . each do | file |
2024-06-26 15:30:05 +00:00
# Encode the filename twice to match how it is stored on the server
2024-06-26 13:52:24 +00:00
double_encoded_filename = file [ 'File_Name' ] . gsub ( '%' , '%25' )
encoded_filename = URI . encode_www_form_component ( double_encoded_filename )
2024-06-26 15:30:05 +00:00
# Construct the file URL
2024-06-26 12:33:49 +00:00
file_url = " https://forum.slowtwitch.com/images/users/images/ #{ file [ 'ID' ] % 10 } / #{ file [ 'ID' ] } - #{ encoded_filename } "
2024-06-17 09:35:13 +00:00
puts " User #{ user . username } User ID: #{ user . id } original_gossamer_id: #{ original_gossamer_id } file_url: #{ file_url } "
2024-06-17 09:35:50 +00:00
2024-06-26 15:30:05 +00:00
# Ensure the file is a user image and has a supported MIME type
2024-06-17 09:35:50 +00:00
next unless file [ 'ForeignColName' ] =~ / ^user_image \ d+$ /
2024-06-17 09:37:13 +00:00
puts " # A "
2024-08-14 15:26:38 +00:00
next unless [ 'image/jpeg' , 'image/png' , 'image/gif' , 'image/webp' , 'image/avif' , 'image/heif' , 'image/heic' ] . include? ( file [ 'File_MimeType' ] )
2024-06-17 09:37:13 +00:00
puts " # B "
2024-06-17 09:35:50 +00:00
2024-06-26 15:30:05 +00:00
# Download the attachment
2024-06-22 05:13:41 +00:00
image_data = download_attachment ( file_url )
2024-06-17 09:35:50 +00:00
next if image_data . nil?
2024-06-17 09:37:13 +00:00
puts " # C "
2024-06-17 09:35:50 +00:00
2024-06-26 15:30:05 +00:00
# Write the image data to a temporary file
2024-06-17 09:37:13 +00:00
temp_file = Tempfile . new ( [ 'user_image' , File . extname ( file [ 'File_Name' ] ) ] )
2024-06-17 09:35:50 +00:00
temp_file . binmode
temp_file . write ( image_data )
temp_file . rewind
2024-06-26 15:30:05 +00:00
# Convert TIFF to PNG if necessary
2024-08-14 15:26:38 +00:00
# if file['File_MimeType'] == 'image/tiff'
# begin
# converted_tiff_to_png_path = convert_tiff_to_png(temp_file.path)
# raise "Conversion of TIFF failed" if converted_tiff_to_png_path.nil?
# temp_file.close
# temp_file.unlink
# temp_file = File.open(converted_tiff_to_png_path)
# rescue => e
# puts "Skipping image due to convert failure: #{temp_file.path}"
# temp_file.close
# temp_file.unlink
# next
# end
# end
2024-06-26 15:30:05 +00:00
# Resize the image for the avatar / profile picture (200x200)
2024-06-27 06:22:12 +00:00
begin
resized_image_path = resize_image ( temp_file . path , 200 , 200 )
raise " Image resize failed " if resized_image_path . nil?
resized_temp_file = Tempfile . new ( [ 'user_image_resized' , '.png' ] )
FileUtils . copy_file ( resized_image_path , resized_temp_file . path )
rescue = > e
2024-06-26 16:14:09 +00:00
puts " Skipping image due to resize failure: #{ temp_file . path } "
temp_file . close
temp_file . unlink
next
end
2024-06-26 15:30:05 +00:00
# Only handle the first image for avator and profile settings
2024-06-17 09:35:50 +00:00
if images_imported == 0
2024-06-17 09:37:13 +00:00
puts " # D "
2024-06-26 15:30:05 +00:00
# Upload the original image for profile header and user card background
2024-08-14 14:32:36 +00:00
upload = upload_attachment ( user . id , temp_file , file [ 'File_Name' ] , file_url )
2024-06-17 09:35:50 +00:00
next if upload . nil?
2024-06-26 15:30:05 +00:00
# Upload the resized image for the avatar
2024-08-14 14:32:36 +00:00
resized_upload = upload_attachment ( user . id , resized_temp_file , file [ 'File_Name' ] , file_url )
2024-06-26 15:30:05 +00:00
next if resized_upload . nil?
2024-06-26 16:35:15 +00:00
# Ensure the upload ID is valid and exists
2024-06-27 05:15:12 +00:00
if resized_upload
# && UserAvatar.exists?(custom_upload_id: resized_upload.id)
2024-06-26 16:35:15 +00:00
# Set the avatar using the resized image
2024-06-27 14:46:16 +00:00
# user.user_avatar = UserAvatar.create!(user_id: user.id, custom_upload_id: resized_upload.id)
# user.save!
# user.create_user_avatar
# user.user_avatar.update(custom_upload_id: resized_upload.id)
# user.update(uploaded_avatar_id: resized_upload.id)
create_avatar ( user , resized_temp_file . path )
2024-06-26 16:35:15 +00:00
puts " Avatar set for user #{ user . username } with upload ID #{ resized_upload . id } "
else
puts " Failed to set avatar for user #{ user . username } "
end
2024-06-17 09:37:13 +00:00
# Set the Profile Header
UserProfile . find_by ( user_id : user . id ) . update! ( profile_background_upload_id : upload . id )
# Set the User Card Background
UserProfile . find_by ( user_id : user . id ) . update! ( card_background_upload_id : upload . id )
2024-06-17 09:35:50 +00:00
images_imported += 1
2024-06-26 15:30:05 +00:00
end
2024-06-17 09:37:13 +00:00
puts " # E "
2024-06-26 15:30:05 +00:00
2024-06-27 05:32:05 +00:00
# Append the image to the user's bio_raw only if it does not already exist
image_markdown = " \n \n ![ #{ file [ 'File_Name' ] } ]( #{ file_url } ) "
user . user_profile . bio_raw || = " "
unless user . user_profile . bio_raw . include? ( image_markdown )
user . user_profile . bio_raw += image_markdown
user . user_profile . save!
end
2024-06-26 15:30:05 +00:00
# Clean up temporary files
2024-06-17 09:35:50 +00:00
temp_file . close
2024-06-27 13:54:21 +00:00
temp_file . unlink if temp_file . is_a? ( Tempfile )
2024-06-26 15:30:05 +00:00
resized_temp_file . close
resized_temp_file . unlink
2024-06-17 09:32:40 +00:00
end
2024-06-27 05:42:40 +00:00
# Check for and remove exact duplicate image markdown strings
user . user_profile . bio_raw = user . user_profile . bio_raw . split ( " \n \n " ) . uniq . join ( " \n \n " )
user . user_profile . save!
2024-06-17 09:34:34 +00:00
print " Importing files for user #{ user . username } ... Done. \n "
2024-06-17 09:32:40 +00:00
end
2024-06-17 09:35:50 +00:00
2024-06-17 09:35:13 +00:00
# Import categories from Gossamer Forums to Discourse
2024-06-17 09:32:40 +00:00
def import_categories
2024-06-17 09:35:50 +00:00
puts " Importing categories (forums)... "
execute_query ( " SELECT * FROM gforum_Forum " ) . each do | row |
2024-06-17 09:35:13 +00:00
# Only create category if it does not exist
2024-06-17 09:35:50 +00:00
unless CategoryCustomField . exists? ( name : 'original_gossamer_id' , value : row [ 'forum_id' ] )
category_name = row [ 'forum_name' ]
category_description = row [ 'forum_desc' ] || " No description provided "
2024-06-17 09:37:13 +00:00
puts " id #{ row [ 'forum_id' ] } name #{ category_name } description #{ category_description } "
2024-06-17 09:35:50 +00:00
# Create category in Discourse
2024-06-17 09:35:13 +00:00
category = create_category (
2024-06-17 09:35:50 +00:00
{
2024-06-17 09:37:13 +00:00
# id: row['forum_id'] + 10,
2024-06-17 09:35:50 +00:00
name : category_name ,
description : category_description ,
created_at : row [ 'forum_last' ] ? Time . at ( row [ 'forum_last' ] ) : Time . now ,
updated_at : row [ 'forum_last' ] ? Time . at ( row [ 'forum_last' ] ) : Time . now
} ,
row [ 'forum_id' ] # import_id argument
2024-06-17 09:35:13 +00:00
)
2024-06-17 09:32:40 +00:00
2024-06-17 09:35:50 +00:00
# # Map Gossamer forum ID to Discourse category ID for future reference
# @forum_id_map[row['forum_id']] = category.id
2024-06-17 09:32:40 +00:00
2024-06-17 09:35:50 +00:00
# category.custom_fields.create!(name: 'original_gossamer_id', value: row['forum_id'])
category . custom_fields [ 'original_gossamer_id' ] = row [ 'forum_id' ]
category . save!
2024-06-17 09:37:13 +00:00
# Store the user ID mapping
puts " for insert_category_id_mapping: category[:id] #{ category [ :id ] } row['forum_id'] #{ row [ 'forum_id' ] } "
insert_category_id_mapping ( row [ 'forum_id' ] , category [ :id ] )
2024-06-17 09:35:50 +00:00
end
2024-06-17 09:32:40 +00:00
end
2024-06-17 09:35:50 +00:00
puts " Importing categories... Done. "
2024-06-17 09:32:40 +00:00
end
2024-06-20 09:23:30 +00:00
# Helper function to ensure title meets the minimum length requirement
def ensure_valid_title ( title , min_length = 5 )
if title . length < min_length
title += " . " * ( min_length - title . length ) # Append dots to make it longer
end
title
2024-06-17 09:36:35 +00:00
end
2024-06-28 06:32:52 +00:00
# # Convert Gossamer tags to Discourse markdown
# def convert_gossamer_tags_to_markdown(text)
# text.gsub!(/\[b\](.*?)\[\/b\]/, '**\1**')
# text.gsub!(/\[i\](.*?)\[\/i\]/, '*\1*')
# text.gsub!(/\[img\](.*?)\[\/img\]/, '![image](\1)')
# text.gsub!(/\[quote\](.*?)\[\/quote\]/m, '[quote]\1[/quote]')
# text.gsub!(/\[quote (.*?)\](.*?)\[\/quote\]/m, '[quote=\1]\2[/quote]')
# text.gsub!(/\[font "(.*?)"\](.*?)\[\/font\]/m, '\2') # Ignoring font changes
# text.gsub!(/\[size (\d+)\](.*?)\[\/size\]/m, '\2') # Ignoring size changes
# text
# end
# Sanitize post message to remove Gossamer-specific tags and convert to Discourse-friendly format
def sanitize_post_message ( message )
# Ensure the raw post string contents itself is acceptable to Discourse
sanitized_message = message & . tr ( " \0 " , '' ) || " "
2024-07-02 03:50:36 +00:00
# Ensure minimum length
if sanitized_message . strip . empty? || sanitized_message . length < 3
sanitized_message = " Empty post contents. "
end
# Ensure sentence structure
unless sanitized_message . match? ( / [.!?] \ s|[.!?]$ / )
sanitized_message += " . "
end
2024-06-28 06:32:52 +00:00
# Remove the [signature] as we don't support this in Discourse
# sanitized_message.sub!(/\n?\[signature\]\n?\z/, '')
sanitized_message . gsub ( / \ n? \ [signature \ ] \ n? / , '' )
# Convert Gossamer tags to Discourse markdown
sanitized_message . gsub! ( / \ [b \ ](.*?) \ [ \/ b \ ] / , '**\1**' )
sanitized_message . gsub! ( / \ [i \ ](.*?) \ [ \/ i \ ] / , '*\1*' )
sanitized_message . gsub! ( / \ [u \ ](.*?) \ [ \/ u \ ] / , '<u>\1</u>' )
sanitized_message . gsub! ( / \ [quote \ ](.*?) \ [ \/ quote \ ] /m , '[quote]\1[/quote]' )
sanitized_message . gsub! ( / \ [quote \ s+user=(.*?) \ ](.*?) \ [ \/ quote \ ] /m , '[quote \1]\2[/quote]' )
sanitized_message . gsub! ( / \ [img \ ](.*?) \ [ \/ img \ ] / , '![image](\1)' )
sanitized_message . gsub! ( / \ [url=(.*?) \ ](.*?) \ [ \/ url \ ] / , '[\2](\1)' )
sanitized_message . gsub! ( / \ [size=(.*?) \ ](.*?) \ [ \/ size \ ] / , '\2' )
sanitized_message . gsub! ( / \ [color=(.*?) \ ](.*?) \ [ \/ color \ ] / , '\2' )
sanitized_message . gsub! ( / \ [font=(.*?) \ ](.*?) \ [ \/ font \ ] / , '\2' )
# Remove unsupported tags
sanitized_message . gsub! ( / \ [.*? \ ] / , '' )
2024-06-28 14:30:16 +00:00
# Convert inline image syntax from `!(url)` to `![url](url)`
sanitized_message . gsub! ( / ! \ ((http[s]?: \/ \/ [^ \ )]+) \ ) / , '![\1](\1)' )
2024-06-28 14:14:23 +00:00
2024-06-28 06:32:52 +00:00
sanitized_message
end
# Fetch post views from the gforum_PostView table
def fetch_post_views ( post_id )
result = execute_query ( " SELECT post_views FROM gforum_PostView WHERE post_id_fk = #{ post_id } LIMIT 1 " ) . first
result ? result [ 'post_views' ] : 0
2024-06-27 15:44:01 +00:00
end
2024-08-15 17:49:52 +00:00
#THREADING OUTLINE HERE --------------------------------------------
2024-08-16 11:30:56 +00:00
# Method to dynamically calculate the optimal thread pool size based on system load
def calculate_dynamic_pool_size
# Fetch current CPU load average using Sys::ProcTable.loadavg
# load_avg = Sys::ProcTable.loadavg.last # Get the 15-minute load average
2024-08-16 16:56:46 +00:00
# load_avg = Sys::ProcTable.loadavg
load_avg = File . read ( '/proc/loadavg' ) . split
2024-08-16 11:30:56 +00:00
# Calculate the pool size based on the load average
# Adjust the multiplier and threshold as needed
# pool_size = [(Concurrent.processor_count / (load_avg + 0.1)).to_i, 1].max
# Extract the 1-minute load average from the fetched data
2024-08-16 16:56:46 +00:00
one_minute_load_avg = load_avg [ 0 ] . to_f
# Determine how many logical CPU cores are available on the system
cpu_count = Concurrent . processor_count
2024-08-16 11:30:56 +00:00
# Log the current load and CPU information for debugging and monitoring purposes
puts " 1-minute Load Average: #{ one_minute_load_avg } , CPU Count: #{ cpu_count } "
# Calculate the initial pool size based on the ratio of the 1-minute load average to the number of CPUs
# This ratio gives an idea of how many threads should be running to efficiently utilize the CPU resources
initial_pool_size = ( cpu_count / one_minute_load_avg ) . ceil
# Ensure the pool size is at least 1 to avoid creating a pool with zero threads
initial_pool_size = 1 if initial_pool_size < 1
# Cap the maximum pool size to twice the number of CPUs
# This prevents overloading the system with too many threads, which could lead to diminishing returns
max_pool_size = cpu_count * 2
# Adjust the final pool size to be within the valid range (1 to max_pool_size)
pool_size = [ [ initial_pool_size , max_pool_size ] . min , 1 ] . max
puts " Calculated and adjusted dynamic pool size: #{ pool_size } " # Log the dynamically adjusted pool size
pool_size
end
2024-08-15 17:49:52 +00:00
def threaded_topic_import
2024-08-16 11:30:56 +00:00
# Get list of TOPICS / OP posts, i.e. post ids that have no parent / root id - SELECT post_id FROM gforum_Post WHERE post_root_id = 0;
2024-08-16 17:06:07 +00:00
2024-08-16 12:20:52 +00:00
# The query selects post_ids from gforum_Post where post_root_id is 0, meaning these posts are the topic starters (OPs).
2024-08-16 17:06:07 +00:00
# Execute the query and fetch the result
result = execute_query ( " SELECT post_id FROM gforum_Post WHERE post_root_id = 0 ORDER BY post_id ASC " )
# Convert the result set to an array of post_ids
parent_post_ids = result . map { | row | row [ 'post_id' ] }
2024-08-15 17:49:52 +00:00
parent_post_count = parent_post_ids . count
2024-08-16 11:30:56 +00:00
batch_size = 100 # Set our batch size for number of posts to import in a single batch
current_post_batch = 0 # Set our current batch number. This tracks the current batch of posts being processed.
is_complete = false # Flag to indicate whether the import process is complete.
2024-08-15 17:49:52 +00:00
2024-08-16 12:20:52 +00:00
# Run until all posts have been processed.
2024-08-15 17:49:52 +00:00
until is_complete
# Query in batches, create pool, wait for termination, do it again
current_post_batch_max = current_post_batch + batch_size
2024-08-16 11:30:56 +00:00
2024-08-16 12:20:52 +00:00
## Static pool size based on number of CPUs
2024-08-16 11:30:56 +00:00
# pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) #create thread pool that is bounded by processors avaialable, however play with the number to see what works best
2024-08-16 12:20:52 +00:00
# Dynamically calculate the pool size based on system load to optimise performance
2024-08-16 11:30:56 +00:00
pool_size = calculate_dynamic_pool_size
2024-08-16 15:06:25 +00:00
pool = Concurrent :: FixedThreadPool . new ( pool_size ) # Create a thread pool with the calculated size
2024-08-15 17:49:52 +00:00
2024-08-16 12:20:52 +00:00
# Process each post in the current batch
2024-08-15 17:49:52 +00:00
while current_post_batch < current_post_batch_max
2024-08-16 12:20:52 +00:00
post_id = parent_post_ids [ current_post_batch ] # Fetch the post_id for the current post
2024-08-16 11:30:56 +00:00
2024-08-16 15:06:25 +00:00
# Check if the post has already been processed or is incomplete
post_status = post_status ( post_id )
if post_status . nil? || post_status == 0
2024-08-16 17:09:40 +00:00
puts " Starting import for post_id #{ post_id } in batch #{ current_post_batch / batch_size + 1 } with #{ pool_size } threads "
2024-08-16 15:06:25 +00:00
# Submit the import job for the current post_id to the thread pool
pool . post do
begin
puts " Processing post ID: #{ post_id } "
topic_import_job ( post_id ) # Import topic and its replies
mark_post_as_complete ( post_id ) # Mark as complete in SQLite table
rescue = > e
puts " Error processing post ID #{ post_id } : #{ e . message } "
mark_post_as_failed ( post_id )
end
end
else
puts " Skipping post_id #{ post_id } , already processed. "
2024-08-15 17:49:52 +00:00
end
2024-08-16 11:30:56 +00:00
2024-08-16 12:20:52 +00:00
current_post_batch += 1 # Increment, moving to next post in the batch
2024-08-15 18:51:07 +00:00
break if current_post_batch > = parent_post_count
2024-08-15 17:49:52 +00:00
end
2024-08-16 12:20:52 +00:00
pool . shutdown # Initiate thread pool shutdown after all jobs submitted
pool . wait_for_termination # Wait for all threads to finish exec
2024-08-15 17:49:52 +00:00
2024-08-16 12:20:52 +00:00
# Set when all posts have been processed
2024-08-16 11:30:56 +00:00
is_complete = true if current_post_batch > = parent_post_count
2024-08-15 17:49:52 +00:00
end
end
2024-08-16 15:06:25 +00:00
# # Method to ensure thread-safe updates to highest_processed_post_id
# def update_highest_processed_post_id_thread_safe(post_id)
# @highest_processed_mutex ||= Mutex.new
# @highest_processed_mutex.synchronize do
# if post_id > fetch_highest_processed_post_id
# update_highest_processed_post_id(post_id)
# end
# end
# end
2024-08-16 12:20:52 +00:00
2024-08-16 11:30:56 +00:00
# Method to import an entire topic, including its first post and all subsequent replies
2024-08-15 17:49:52 +00:00
def topic_import_job ( post_id )
#Here is where you can import the entire topic
#Get post -- SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_id = post_id
#check if exists, create if not
#get children, create -- SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_root_id = post_id
#this parts needs to be synchronously to avoid race conditions
2024-08-16 11:30:56 +00:00
# Fetch the post data for the given post_id (this is the first post in the topic)
2024-08-16 17:27:17 +00:00
row = execute_query ( " SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_id = #{ post_id } " ) . first
2024-08-16 11:30:56 +00:00
# Early return if the post data is not found
return unless row
# Extract key values from the fetched row
post_id = row [ 'post_id' ] . to_i
puts " Processing post_id #{ row [ 'post_id' ] } post_root_id #{ row [ 'post_root_id' ] } post_subject/title #{ row [ 'post_subject' ] } forum_id_fk/category_id #{ row [ 'forum_id_fk' ] } "
# Fetch the mapped Discourse user and category ID based on Gossamer data
discourse_user_id = fetch_user_id_mapping ( row [ 'user_id_fk' ] )
discourse_category_id = fetch_category_id_mapping ( row [ 'forum_id_fk' ] )
puts " discourse_user_id #{ discourse_user_id } discourse_category_id #{ discourse_category_id } "
return unless discourse_user_id && discourse_category_id
# Ensure the topic title is valid and generate a unique title if needed
title = ensure_valid_title ( row [ 'post_subject' ] )
unique_title = title
# Fetch the number of views the post has had
post_views = fetch_post_views ( row [ 'post_id' ] )
# Check if the topic has already been imported using the custom field 'original_gossamer_id'
unless TopicCustomField . exists? ( name : 'original_gossamer_id' , value : row [ 'post_id' ] )
# Create the new topic in Discourse
begin
suffix = 1
topic_created = false
while ! topic_created
begin
puts " CREATE TOPIC unique_title #{ unique_title } title #{ title } discourse_user_id #{ discourse_user_id } category_id #{ discourse_category_id } "
topic = Topic . create! (
title : unique_title ,
user_id : discourse_user_id ,
created_at : Time . at ( row [ 'post_time' ] ) ,
updated_at : Time . at ( row [ 'post_time' ] ) ,
category_id : discourse_category_id ,
views : post_views || 0 ,
posts_count : 0
)
topic . custom_fields [ 'original_gossamer_id' ] = row [ 'post_id' ]
topic . save!
topic_created = true
rescue ActiveRecord :: RecordInvalid = > e
if e . message . include? ( " Title has already been used " )
unique_title = " #{ title } ( #{ suffix } ) "
suffix += 1
else
raise e
end
end
end
# Update the database with the last post time and user for the topic
update_db_topic_last_post_time ( topic . id , Time . at ( row [ 'post_time' ] ) . to_i )
update_db_topic_last_post_user ( topic . id , discourse_user_id )
# Increment the topic count for the user
update_db_user_topic_count ( discourse_user_id , fetch_db_user_topic_count ( discourse_user_id ) . to_i + 1 )
# Sanitize and prepare the post message for Discourse
sanitized_post_message = sanitize_post_message ( row [ 'post_message' ] )
puts " CREATE POST topic.id #{ topic . id } discourse_user_id #{ discourse_user_id } "
# Increment the post count for the topic
post_number = fetch_db_topic_post_numbers ( topic . id ) . to_i + 1
update_db_topic_post_numbers ( topic . id , post_number )
# Create the initial post in the new topic
post = Post . create! (
topic_id : topic . id ,
user_id : discourse_user_id ,
raw : sanitized_post_message ,
created_at : Time . at ( row [ 'post_time' ] ) ,
updated_at : Time . at ( row [ 'post_time' ] ) ,
reads : post_views || 0 ,
post_number : post_number
)
post . custom_fields [ 'original_gossamer_id' ] = row [ 'post_id' ]
post . save!
# Increment the post count for the topic and user
update_db_topic_post_count ( topic . id , fetch_db_topic_post_count ( topic . id ) . to_i + 1 )
update_db_user_post_count ( discourse_user_id , fetch_db_user_post_count ( discourse_user_id ) . to_i + 1 )
# Handle any attachments associated with the post
handle_post_attachments ( row [ 'post_id' ] , post , discourse_user_id )
# Create URL mappings for the new topic
new_url = " https://new/t/ #{ topic . slug } / #{ topic . id } "
insert_url_mapping ( row [ 'post_id' ] , new_url , unique_title )
# Now fetch and import all replies to this topic
2024-08-16 17:27:17 +00:00
replies = execute_query ( " SELECT post_id, user_id_fk, post_message, post_time, FROM gforum_Post WHERE post_root_id = #{ post_id } ORDER BY post_time ASC " )
2024-08-16 11:30:56 +00:00
# Import each reply sequentially
replies . each do | reply_row |
begin
# Fetch the discourse user ID for the reply
reply_user_id = fetch_user_id_mapping ( reply_row [ 'user_id_fk' ] )
# Sanitize and prepare the reply message for Discourse
sanitized_reply_message = sanitize_post_message ( reply_row [ 'post_message' ] )
puts " CREATE REPLY in topic_id #{ topic . id } "
# Increment the post count for the topic
post_number = fetch_db_topic_post_numbers ( topic . id ) . to_i + 1
update_db_topic_post_numbers ( topic . id , post_number )
# Create the reply post in the existing topic
post = Post . create! (
topic_id : topic . id ,
user_id : reply_user_id ,
raw : sanitized_reply_message ,
created_at : Time . at ( reply_row [ 'post_time' ] ) ,
updated_at : Time . at ( reply_row [ 'post_time' ] ) ,
post_number : post_number
)
post . custom_fields [ 'original_gossamer_id' ] = reply_row [ 'post_id' ]
post . save!
# Increment the post count for the topic and user
update_db_topic_post_count ( topic . id , fetch_db_topic_post_count ( topic . id ) . to_i + 1 )
update_db_user_post_count ( reply_user_id , fetch_db_user_post_count ( reply_user_id ) . to_i + 1 )
# Update last post time and user for the topic
if fetch_db_topic_last_post_time ( topic . id ) . nil? || Time . at ( reply_row [ 'post_time' ] ) . to_i > fetch_db_topic_last_post_time ( topic . id ) . to_i
update_db_topic_last_post_time ( topic . id , Time . at ( reply_row [ 'post_time' ] ) . to_i )
update_db_topic_last_post_user ( topic . id , reply_user_id )
end
# Handle any attachments associated with the reply
handle_post_attachments ( reply_row [ 'post_id' ] , post , reply_user_id )
2024-08-16 15:06:25 +00:00
# # Update the highest processed post_id in the database (thread-safe)
# update_highest_processed_post_id_thread_safe(reply_row['post_id'])
2024-08-16 11:30:56 +00:00
rescue ActiveRecord :: RecordInvalid = > e
puts " Error importing reply with post_id #{ reply_row [ 'post_id' ] } : #{ e . message } "
end
end
2024-08-16 15:06:25 +00:00
# # After processing the entire topic, update the highest_processed_post_id to the current topic's post_id (thread-safe)
# update_highest_processed_post_id_thread_safe(post_id)
2024-08-16 12:20:52 +00:00
2024-08-16 11:30:56 +00:00
rescue ActiveRecord :: RecordInvalid = > e
puts " Error importing topic with post_id #{ row [ 'post_id' ] } : #{ e . message } "
end
end
2024-08-15 17:49:52 +00:00
end
2024-08-16 11:30:56 +00:00
2024-08-15 17:49:52 +00:00
#-------------------------------------------------------------------
2024-06-26 05:26:26 +00:00
# Import topics and posts from Gossamer Forums to Discourse
def import_topics_and_posts_with_attachments
puts " Importing topics and posts with attachments... "
2024-06-28 06:32:52 +00:00
2024-06-28 12:25:26 +00:00
# topic_last_post_time = {}
# topic_post_count = Hash.new(0)
# user_topic_count = Hash.new(0)
# user_post_count = Hash.new(0)
# topic_last_post_user = {}
# topic_post_numbers = Hash.new { |hash, key| hash[key] = 0 }
2024-06-28 06:32:52 +00:00
2024-07-01 06:08:07 +00:00
# Fetch the highest old_post_id from the url_map table
highest_old_post_id = fetch_highest_old_post_id . to_i
puts " Highest (OP) old_post_id in url_map: #{ highest_old_post_id } "
highest_processed_post_id = fetch_highest_processed_post_id . to_i
puts " Highest processed post_id: #{ highest_processed_post_id } "
2024-07-02 04:13:42 +00:00
# OVERRIDE........
2024-07-04 13:08:52 +00:00
# Attachment example: highest_processed_post_id = 1359862
2024-07-02 04:13:42 +00:00
2024-06-26 05:26:26 +00:00
# Execute the query to get all posts ordered by post_id
2024-08-16 11:30:56 +00:00
execute_query ( " SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_likes, post_replies FROM gforum_Post ORDER BY post_id " ) . each do | row |
2024-07-01 06:08:07 +00:00
post_id = row [ 'post_id' ] . to_i
2024-07-10 04:12:38 +00:00
# Skip posts that have already been processed
2024-07-01 06:30:44 +00:00
# FIXED PROPERLY FOR FUTURE RUNS
# next if post_id < highest_old_post_id
next if post_id < = highest_processed_post_id
2024-07-01 06:08:07 +00:00
puts " Processing post_id #{ row [ 'post_id' ] } post_id #{ row [ 'post_root_id' ] } post_subject/title #{ row [ 'post_subject' ] } forum_id_fk/category_id #{ row [ 'forum_id_fk' ] } "
2024-06-28 06:32:52 +00:00
# discourse_user_id = @user_id_map[row['user_id_fk']]
# Fetch the Discourse user and category IP mappings
2024-06-26 05:26:26 +00:00
discourse_user_id = fetch_user_id_mapping ( row [ 'user_id_fk' ] )
discourse_category_id = fetch_category_id_mapping ( row [ 'forum_id_fk' ] )
puts " discourse_user_id #{ discourse_user_id } discourse_category_id #{ discourse_category_id } "
next unless discourse_user_id && discourse_category_id
2024-06-28 06:32:52 +00:00
# Check if the post is a topic (post_root_id == 0)
2024-06-26 05:26:26 +00:00
if row [ 'post_root_id' ] == 0
puts " # 1 "
# Ensure the title is valid
title = ensure_valid_title ( row [ 'post_subject' ] )
2024-07-02 08:49:59 +00:00
unique_title = title
2024-06-28 06:32:52 +00:00
# Confirm the number of views the post has had
post_views = fetch_post_views ( row [ 'post_id' ] )
2024-06-26 05:26:26 +00:00
# Skip if the topic already exists
unless TopicCustomField . exists? ( name : 'original_gossamer_id' , value : row [ 'post_id' ] )
# Create the topic
begin
2024-07-02 08:49:59 +00:00
suffix = 1
topic_created = false
while ! topic_created
begin
puts " # 2 "
2024-07-02 09:10:54 +00:00
# unique_title = generate_unique_title(title, discourse_user_id, Time.at(row['post_time']))
2024-07-02 08:49:59 +00:00
puts " CREATE TOPIC unique_title #{ unique_title } title #{ title } discourse_user_id #{ discourse_user_id } category_id #{ discourse_category_id } "
topic = Topic . create! (
title : unique_title ,
user_id : discourse_user_id ,
created_at : Time . at ( row [ 'post_time' ] ) ,
updated_at : Time . at ( row [ 'post_time' ] ) ,
category_id : discourse_category_id ,
views : post_views || 0 ,
posts_count : 0
)
topic . custom_fields [ 'original_gossamer_id' ] = row [ 'post_id' ]
topic . save!
topic_created = true
rescue ActiveRecord :: RecordInvalid = > e
if e . message . include? ( " Title has already been used " )
2024-07-02 09:00:04 +00:00
unique_title = " #{ title } ( #{ suffix } ) "
2024-07-02 08:49:59 +00:00
suffix += 1
else
raise e
end
end
end
2024-06-28 06:32:52 +00:00
# Track last post time and user for the topic
2024-06-28 12:25:26 +00:00
# topic_last_post_time[topic.id] = Time.at(row['post_time'])
# topic_last_post_user[topic.id] = discourse_user_id
2024-06-28 16:17:30 +00:00
update_db_topic_last_post_time ( topic . id , Time . at ( row [ 'post_time' ] ) . to_i )
update_db_topic_last_post_user ( topic . id , discourse_user_id )
2024-06-28 06:32:52 +00:00
# Increment the count of the number of topics created by each user
2024-06-28 12:25:26 +00:00
# user_topic_count[discourse_user_id] += 1
2024-06-28 16:17:30 +00:00
update_db_user_topic_count ( discourse_user_id , fetch_db_user_topic_count ( discourse_user_id ) . to_i + 1 )
2024-06-26 05:26:26 +00:00
2024-06-28 06:32:52 +00:00
# # Ensure the raw post stirng contents itself is acceptable to Discourse
# sanitized_post_message = row['post_message']&.tr("\0", '') || ""
#
# # Convert Gossamer tags to Discourse markdown
# sanitized_post_message = convert_gossamer_tags_to_markdown(sanitized_post_message)
#
# # Remove the [signature] label from appearing at the end of the messages after import
# sanitized_post_message.sub(/\n?\[signature\]\n?\z/, '')
# sanitized_post_message.gsub(/\n?\[signature\]\n?/, '')
# Sanitize the post message
sanitized_post_message = sanitize_post_message ( row [ 'post_message' ] )
2024-06-26 05:26:26 +00:00
puts " CREATE POST topic.id #{ topic . id } discourse_user_id #{ discourse_user_id } "
2024-06-28 06:32:52 +00:00
# Increment the number of posts in the given topic.
2024-06-28 12:25:26 +00:00
# topic_post_numbers[topic.id] += 1
2024-06-28 16:17:30 +00:00
post_number = fetch_db_topic_post_numbers ( topic . id ) . to_i + 1
update_db_topic_post_numbers ( topic . id , post_number )
2024-06-28 06:32:52 +00:00
# Create the initial post in the topic
2024-06-26 05:26:26 +00:00
post = Post . create! (
topic_id : topic . id ,
user_id : discourse_user_id ,
# raw: import_attachments(row['post_message'], row['post_id']),
# raw: row['post_message'] || "",
raw : sanitized_post_message ,
created_at : Time . at ( row [ 'post_time' ] ) ,
2024-06-28 06:32:52 +00:00
updated_at : Time . at ( row [ 'post_time' ] ) ,
like_count : row [ 'post_likes' ] || 0 ,
reads : post_views || 0 ,
2024-06-28 12:25:26 +00:00
post_number : post_number
2024-06-26 05:26:26 +00:00
)
2024-06-28 12:25:26 +00:00
# post_number: topic_post_numbers[topic.id]
2024-06-26 05:26:26 +00:00
post . custom_fields [ 'original_gossamer_id' ] = row [ 'post_id' ]
post . save!
2024-06-28 06:32:52 +00:00
# Track the number of posts in the topic and by the user
2024-06-28 12:25:26 +00:00
# topic_post_count[topic.id] += 1
# user_post_count[discourse_user_id] += 1
2024-06-28 16:17:30 +00:00
update_db_topic_post_count ( topic . id , fetch_db_topic_post_count ( topic . id ) . to_i + 1 )
update_db_user_post_count ( discourse_user_id , fetch_db_user_post_count ( discourse_user_id ) . to_i + 1 )
2024-06-28 06:32:52 +00:00
2024-06-26 05:26:26 +00:00
# Handle attachments for the post
handle_post_attachments ( row [ 'post_id' ] , post , discourse_user_id )
# Create URL mappings
# old_url = "https://old/forum/#{row['forum_name']}/topics/#{row['post_id']}"
new_url = " https://new/t/ #{ topic . slug } / #{ topic . id } "
2024-07-01 08:49:59 +00:00
insert_url_mapping ( row [ 'post_id' ] , new_url , unique_title )
2024-07-01 06:08:07 +00:00
# Update the highest processed post_id
puts " Updated highest processed post_id #{ post_id } "
update_highest_processed_post_id ( post_id )
2024-06-26 05:26:26 +00:00
rescue ActiveRecord :: RecordInvalid = > e
puts " Error importing topic with post_id #{ row [ 'post_id' ] } : #{ e . message } "
end
end
else
puts " # 3 "
2024-06-28 06:32:52 +00:00
# Confirm the number of views the post has had
post_views = fetch_post_views ( row [ 'post_id' ] )
2024-06-26 05:26:26 +00:00
# Find the root topic for the post
root_topic_field = TopicCustomField . find_by ( name : 'original_gossamer_id' , value : row [ 'post_root_id' ] )
2024-06-17 09:36:35 +00:00
2024-06-26 05:26:26 +00:00
if root_topic_field
topic_id = root_topic_field . topic_id
# Find the parent post for the reply
parent_post_field = PostCustomField . find_by ( name : 'original_gossamer_id' , value : row [ 'post_father_id' ] )
reply_to_post_number = parent_post_field ? Post . find ( parent_post_field . post_id ) . post_number : nil
# Create the post in the existing topic
begin
puts " # 4 "
2024-06-28 06:32:52 +00:00
# Sanitize the post message
sanitized_post_message = sanitize_post_message ( row [ 'post_message' ] )
2024-06-28 12:25:26 +00:00
# topic_post_numbers[topic_id] += 1
2024-06-28 16:17:30 +00:00
post_number = fetch_db_topic_post_numbers ( topic_id ) . to_i + 1
update_db_topic_post_numbers ( topic_id , post_number )
2024-06-27 15:44:01 +00:00
2024-06-28 12:25:26 +00:00
# Create the post in the existing topic
2024-06-26 05:26:26 +00:00
post = Post . create! (
topic_id : topic_id ,
user_id : discourse_user_id ,
# raw: import_attachments(row['post_message'], row['post_id']),
# raw: row['post_message'] || "",
raw : sanitized_post_message ,
created_at : Time . at ( row [ 'post_time' ] ) ,
2024-06-28 06:32:52 +00:00
updated_at : Time . at ( row [ 'post_time' ] ) ,
reply_to_post_number : reply_to_post_number ,
like_count : row [ 'post_replies' ] || 0 ,
2024-06-28 16:17:30 +00:00
reads : post_views || fetch_db_topic_post_count ( topic_id ) . to_i ,
2024-06-28 12:25:26 +00:00
post_number : post_number
2024-06-26 05:26:26 +00:00
)
post . custom_fields [ 'original_gossamer_id' ] = row [ 'post_id' ]
post . save!
2024-06-28 06:32:52 +00:00
# Track the number of posts in the topic and by the user
2024-06-28 12:25:26 +00:00
# topic_post_count[topic_id] += 1
# user_post_count[discourse_user_id] += 1
2024-06-28 16:17:30 +00:00
update_db_topic_post_count ( topic_id , fetch_db_topic_post_count ( topic_id ) . to_i + 1 )
update_db_user_post_count ( discourse_user_id , fetch_db_user_post_count ( discourse_user_id ) . to_i + 1 )
2024-06-26 05:26:26 +00:00
2024-06-28 06:32:52 +00:00
# Update last post time and user for the topic
2024-06-28 12:25:26 +00:00
# if topic_last_post_time[topic_id].nil? || Time.at(row['post_time']) > topic_last_post_time[topic_id]
# topic_last_post_time[topic_id] = Time.at(row['post_time'])
# topic_last_post_user[topic_id] = discourse_user_id
# end
2024-06-28 16:17:30 +00:00
if fetch_db_topic_last_post_time ( topic_id ) . nil? || Time . at ( row [ 'post_time' ] ) . to_i > fetch_db_topic_last_post_time ( topic_id ) . to_i
update_db_topic_last_post_time ( topic_id , Time . at ( row [ 'post_time' ] ) . to_i )
update_db_topic_last_post_user ( topic_id , discourse_user_id )
2024-06-28 06:32:52 +00:00
end
2024-06-26 05:26:26 +00:00
# Handle attachments for the post
handle_post_attachments ( row [ 'post_id' ] , post , discourse_user_id )
2024-07-01 06:08:07 +00:00
# Update the highest processed post_id
puts " Updated highest processed post_id #{ post_id } "
update_highest_processed_post_id ( post_id )
2024-06-28 06:32:52 +00:00
2024-06-26 05:26:26 +00:00
rescue ActiveRecord :: RecordInvalid = > e
puts " Error importing post with post_id #{ row [ 'post_id' ] } : #{ e . message } "
end
else
puts " Warning: Root topic not found for post_id #{ row [ 'post_id' ] } with post_root_id #{ row [ 'post_root_id' ] } "
2024-06-17 09:36:35 +00:00
end
2024-06-17 09:35:13 +00:00
end
2024-06-17 09:32:40 +00:00
end
2024-07-01 06:28:27 +00:00
puts " Importing topics and posts with attachments... Done. "
end
2024-06-28 06:32:52 +00:00
2024-07-04 13:46:40 +00:00
def update_topic_stats
2024-06-28 06:32:52 +00:00
# Update topics with the correct last post time, post count, and last post user
2024-07-01 06:28:27 +00:00
puts " Update topics with the correct last post time, post count, and last post user "
2024-06-28 12:25:26 +00:00
# topic_last_post_time.each do |topic_id, last_post_time|
# Topic.find(topic_id).update!(
# updated_at: last_post_time,
# posts_count: topic_post_count[topic_id],
# last_posted_at: last_post_time,
# bumped_at: last_post_time,
# last_post_user_id: topic_last_post_user[topic_id]
# )
# end
@db . execute ( " SELECT * FROM topic_last_post_time " ) . each do | row |
topic_id , last_post_time = row
2024-07-04 13:29:16 +00:00
# Validation error: Topic.find(topic_id).update!(
topic = Topic . find ( topic_id )
# Ensure we are only updating necessary fields
topic . update_columns (
2024-06-28 12:25:26 +00:00
updated_at : Time . at ( last_post_time ) ,
2024-06-28 16:17:30 +00:00
posts_count : fetch_db_topic_post_count ( topic_id ) . to_i ,
2024-06-28 12:25:26 +00:00
last_posted_at : Time . at ( last_post_time ) ,
bumped_at : Time . at ( last_post_time ) ,
2024-06-28 16:17:30 +00:00
last_post_user_id : fetch_db_topic_last_post_user ( topic_id ) . to_i
2024-06-28 06:32:52 +00:00
)
end
2024-07-04 13:46:40 +00:00
end
2024-06-28 06:32:52 +00:00
2024-07-04 13:46:40 +00:00
def update_user_stats
2024-06-28 06:32:52 +00:00
# Update user profiles with the number of topics and posts created
2024-07-01 06:28:27 +00:00
puts " Update user profiles with the number of topics and posts created "
2024-06-28 12:25:26 +00:00
# user_topic_count.each do |user_id, count|
# user = User.find(user_id)
# user.update!(topic_count: count)
# end
# user_post_count.each do |user_id, count|
# user = User.find(user_id)
# user.update!(post_count: count)
# end
@db . execute ( " SELECT * FROM user_topic_count " ) . each do | row |
user_id , count = row
2024-07-04 13:46:40 +00:00
# user = User.find(user_id)
# user.update!(topic_count: count)
2024-07-04 15:50:33 +00:00
puts " update_user_stats user_id #{ user_id } topic_count #{ count } "
2024-07-04 13:46:40 +00:00
user_stat = UserStat . find_or_initialize_by ( user_id : user_id )
user_stat . update_columns ( topic_count : count )
2024-06-28 06:32:52 +00:00
end
2024-06-28 12:25:26 +00:00
@db . execute ( " SELECT * FROM user_post_count " ) . each do | row |
user_id , count = row
2024-07-04 13:46:40 +00:00
# user = User.find(user_id)
# user.update!(post_count: count)
2024-07-04 15:50:33 +00:00
puts " update_user_stats user_id #{ user_id } post_count #{ count } "
2024-07-04 13:46:40 +00:00
user_stat = UserStat . find_or_initialize_by ( user_id : user_id )
user_stat . update_columns ( post_count : count )
2024-08-14 06:28:44 +00:00
# Determine the new Trust Level based on post_count
user = User . find ( user_id )
new_trust_level = case count
when 0 .. 2 then 1 # basic user
when 3 .. 50 then 2 # member
else 3 # regular or above when 51..100
end
# Fetch the current user and check if Trust Level needs updating
user = User . find ( user_id )
current_trust_level = user . trust_level || 1 # default to 1 if not set
# Only update trust level if the new level is higher than the current one
if new_trust_level > current_trust_level
user . update! ( trust_level : new_trust_level )
puts " update_user_stats user_id #{ user_id } trust_level updated to #{ new_trust_level } "
else
puts " update_user_stats user_id #{ user_id } trust_level remains at #{ current_trust_level } "
end
2024-06-28 06:32:52 +00:00
end
2024-07-01 06:30:44 +00:00
end
2024-06-17 09:37:13 +00:00
2024-06-17 09:37:32 +00:00
2024-06-26 05:26:26 +00:00
# Import personal messages from gforum_Message table (both inbox and sent messages)
def import_personal_messages
2024-07-04 16:35:51 +00:00
puts " Importing personal (inbox and sentmail) messages... "
2024-07-04 15:28:39 +00:00
# Fetch the highest_processed_personal_id
highest_processed_personal_id = fetch_highest_processed_personal_id . to_i
puts " Highest processed personal_id: #{ highest_processed_personal_id } "
2024-07-04 16:35:51 +00:00
# OVERRIDE - to speed getting to problem msg
2024-07-15 04:17:12 +00:00
# # # highest_processed_personal_id = 1543840
2024-07-04 15:50:33 +00:00
puts " Highest processed personal_id override: #{ highest_processed_personal_id } "
2024-07-04 15:28:39 +00:00
2024-06-26 05:26:26 +00:00
execute_query ( " SELECT * FROM gforum_Message " ) . each do | row |
2024-07-04 16:35:51 +00:00
begin
msg_id = row [ 'msg_id' ] . to_i
puts " msg_id #{ msg_id } "
2024-06-26 05:26:26 +00:00
2024-07-04 16:35:51 +00:00
# Skip posts that have already been processed
next if msg_id < = highest_processed_personal_id
2024-06-26 05:26:26 +00:00
2024-07-04 16:35:51 +00:00
from_user_id = fetch_user_id_mapping ( row [ 'from_user_id_fk' ] )
to_user_id = fetch_user_id_mapping ( row [ 'to_user_id_fk' ] )
next unless from_user_id && to_user_id
# Skip if the message already exists
unless TopicCustomField . exists? ( name : 'original_gossamer_msg_id' , value : row [ 'msg_id' ] )
# Sanitize the message, ensuring we have an empty string or the content without any \0
sanitized_message = sanitize_post_message ( row [ 'msg_body' ] )
# Set default message body if the sanitized message is blank
sanitized_message = " <No message body to migrate.> " if sanitized_message . strip . empty? || sanitized_message . split . size < 3
# If we do not change the "min personal message post length" to 1, we need this. ... We may need this anyway.
sanitized_message = sanitized_message . ljust ( 10 , '.' ) if sanitized_message . length < 10
# Check and set a default title if the original title is nil or empty
sanitized_title = row [ 'msg_subject' ] & . strip
sanitized_title = " <no subject> " if sanitized_title . nil? || sanitized_title . empty?
2024-07-12 13:00:17 +00:00
# Check for an existing private message topic between the same two users with a similar title
2024-07-12 13:57:27 +00:00
# topic = Topic.joins(:topic_allowed_users, :custom_fields)
# .where(archetype: Archetype.private_message)
# .where("topic_allowed_users.user_id = ? AND topic_allowed_users.user_id = ?", from_user_id, to_user_id)
# .where("title = ? OR title = ?", sanitized_title, "Re: #{sanitized_title}")
# .where(topic_custom_fields: { name: 'original_gossamer_msg_id' })
# .first
2024-07-12 15:10:44 +00:00
topic = Topic . joins ( :topic_allowed_users )
. where ( archetype : Archetype . private_message )
2024-07-12 15:38:08 +00:00
. where ( " topics.title = ? OR topics.title = ? " , sanitized_title , sanitized_title . gsub ( / ^Re: / , " " ) )
2024-07-12 15:10:44 +00:00
. where ( " topic_allowed_users.user_id IN (?) " , [ from_user_id , to_user_id ] )
. group ( " topics.id " )
. having ( " COUNT(topic_allowed_users.user_id) = 2 " )
. first
2024-07-12 15:38:08 +00:00
# .where("topics.title = ? OR topics.title = ?", sanitized_title, sanitized_title.gsub(/^Re: /, ""))
2024-07-12 15:10:44 +00:00
# .where("topics.topic_allowed_users.user_id = ? AND topics.topic_allowed_users.user_id = ?", from_user_id, to_user_id)
# .where("((topics.user_id = ? AND topics.topic_allowed_users.user_id = ?) OR (topics.user_id = ? AND topics.topic_allowed_users.user_id = ?))", from_user_id, to_user_id, to_user_id, from_user_id)
2024-07-12 13:00:17 +00:00
if topic . nil?
puts " IMPORTING new message topic sanitized: #{ sanitized_title } user_id #{ from_user_id } to_user_id #{ to_user_id } "
# Create a private message topic in Discourse
topic = Topic . create! (
title : sanitized_title ,
user_id : from_user_id ,
archetype : Archetype . private_message ,
created_at : Time . at ( row [ 'msg_time' ] ) ,
updated_at : Time . at ( row [ 'msg_time' ] ) ,
last_posted_at : Time . at ( row [ 'msg_time' ] ) ,
last_post_user_id : from_user_id
)
topic . custom_fields [ 'original_gossamer_msg_id' ] = row [ 'msg_id' ]
topic . save!
# Add recipient user to the private message topic
topic . topic_allowed_users . create! ( user_id : to_user_id )
# Add sender user to the private message topic
topic . topic_allowed_users . create! ( user_id : from_user_id )
else
puts " APPENDING to existing message topic sanitized: #{ sanitized_title } user_id #{ from_user_id } to_user_id #{ to_user_id } "
# Increment the number of replies for the topic
topic . increment! ( :posts_count )
end
2024-06-26 05:26:26 +00:00
2024-07-04 16:35:51 +00:00
# Create the message as a post in the private topic
post = Post . create! (
topic_id : topic . id ,
user_id : from_user_id ,
raw : sanitized_message ,
created_at : Time . at ( row [ 'msg_time' ] ) ,
updated_at : Time . at ( row [ 'msg_time' ] )
)
post . custom_fields [ 'original_gossamer_msg_id' ] = row [ 'msg_id' ]
post . save!
2024-07-12 13:00:17 +00:00
# Update the topic's last reply information
topic . update! (
last_posted_at : post . updated_at ,
last_post_user_id : post . user_id
)
2024-06-26 05:26:26 +00:00
2024-07-04 16:35:51 +00:00
update_highest_processed_personal_id ( msg_id )
2024-06-26 05:26:26 +00:00
2024-07-12 13:00:17 +00:00
# N/A. These were never a thing in Slowtwitch...
# handle_post_attachments(row['msg_id'], post, from_user_id)
2024-07-04 16:35:51 +00:00
end
rescue StandardError = > e
puts " Error importing message #{ row [ 'msg_id' ] } : #{ e . message } "
2024-06-26 05:26:26 +00:00
end
2024-06-20 09:23:30 +00:00
end
end
2024-06-17 09:37:13 +00:00
2024-06-17 09:35:50 +00:00
2024-06-17 09:35:13 +00:00
# Main method to perform the import
2024-06-17 09:32:40 +00:00
def perform_import
2024-06-20 09:23:30 +00:00
# Secret trick to disable RateLimiting protection in Discourse
2024-06-17 09:36:35 +00:00
RateLimiter . disable
2024-06-20 03:28:14 +00:00
2024-08-14 06:28:44 +00:00
# ActiveRecord::Base.connection.disconnect! rescue nil
# ActiveRecord::Base.clear_active_connections! rescue nil
# ActiveRecord::Base.establish_connection(
# adapter: 'postgresql',
# host: '10.0.0.2', # Use the external DB host
# port: 5432, # Default PostgreSQL port
# database: 'discourse',
# username: 'discourse',
# password: 'nhB5FWhQkjdvaD2ViRNO63dQagDnzaTn',
# connect_timeout: 10
# )
#
ENV [ 'PGHOST' ] = '10.0.0.2'
ENV [ 'PGPORT' ] = '5432'
ENV [ 'PGDATABASE' ] = 'discourse'
ENV [ 'PGUSER' ] = 'discourse'
ENV [ 'PGPASSWORD' ] = 'nhB5FWhQkjdvaD2ViRNO63dQagDnzaTn'
# ActiveRecord::Base.establish
2024-06-20 03:28:14 +00:00
# Set our unique timestamp for this migration run
2024-07-01 06:28:27 +00:00
timestamp = Time . now . strftime ( " -%Y%m%d%H%M%S " )
2024-06-20 03:28:14 +00:00
puts " Starting Gossamer Forums import... #{ timestamp } "
2024-08-16 16:47:22 +00:00
# add_former_user
# import_users
2024-08-15 17:17:06 +00:00
2024-08-16 16:47:22 +00:00
# generate_user_id_mapping
# export_username_mapping_to_csv("/bitnami/discourse/sqlite/gossamer-migration-username-mapping#{timestamp}")
2024-08-16 15:52:45 +00:00
2024-08-16 16:47:22 +00:00
## set_user_bio_images
2024-08-16 15:06:25 +00:00
2024-08-16 16:47:22 +00:00
# import_categories
2024-06-20 03:28:14 +00:00
2024-08-16 16:47:22 +00:00
####### import_topics_and_posts_with_attachments
2024-08-16 15:52:45 +00:00
threaded_topic_import
2024-08-16 16:47:22 +00:00
update_topic_stats
update_user_stats
export_url_mapping_to_csv ( " /bitnami/discourse/sqlite/gossamer-migration-url-mapping #{ timestamp } " )
create_nginx_rewrite_rules ( " /bitnami/discourse/sqlite/gossamer-redirects #{ timestamp } .conf " )
2024-08-16 11:34:02 +00:00
2024-08-16 16:47:22 +00:00
import_personal_messages
2024-06-20 03:28:14 +00:00
puts " Gossamer Forums import complete! #{ timestamp } "
2024-06-17 09:32:40 +00:00
end
end
2024-06-17 09:35:13 +00:00
GossamerForumsImporter . new . perform_import
2024-06-17 09:32:40 +00:00