v0.19 Massive improvement of topic and post migration handling with extra field population

This commit is contained in:
David Sainty 2024-06-28 16:32:52 +10:00
parent 94d8264938
commit c7ce8fdf98

View File

@ -1,5 +1,5 @@
# gossamer threads migration-import code
# v0.17.2
# v0.19
require 'mysql2'
require 'open-uri'
@ -602,35 +602,81 @@ class GossamerForumsImporter < ImportScripts::Base
title
end
# Convert Gossamer tags to Discourse markdown
def convert_gossamer_tags_to_markdown(text)
text.gsub!(/\[b\](.*?)\[\/b\]/, '**\1**')
text.gsub!(/\[i\](.*?)\[\/i\]/, '*\1*')
text.gsub!(/\[img\](.*?)\[\/img\]/, '![image](\1)')
text.gsub!(/\[quote\](.*?)\[\/quote\]/m, '[quote]\1[/quote]')
text.gsub!(/\[quote (.*?)\](.*?)\[\/quote\]/m, '[quote=\1]\2[/quote]')
text.gsub!(/\[font "(.*?)"\](.*?)\[\/font\]/m, '\2') # Ignoring font changes
text.gsub!(/\[size (\d+)\](.*?)\[\/size\]/m, '\2') # Ignoring size changes
text
# # Convert Gossamer tags to Discourse markdown
# def convert_gossamer_tags_to_markdown(text)
# text.gsub!(/\[b\](.*?)\[\/b\]/, '**\1**')
# text.gsub!(/\[i\](.*?)\[\/i\]/, '*\1*')
# text.gsub!(/\[img\](.*?)\[\/img\]/, '![image](\1)')
# text.gsub!(/\[quote\](.*?)\[\/quote\]/m, '[quote]\1[/quote]')
# text.gsub!(/\[quote (.*?)\](.*?)\[\/quote\]/m, '[quote=\1]\2[/quote]')
# text.gsub!(/\[font "(.*?)"\](.*?)\[\/font\]/m, '\2') # Ignoring font changes
# text.gsub!(/\[size (\d+)\](.*?)\[\/size\]/m, '\2') # Ignoring size changes
# text
# end
# Sanitize post message to remove Gossamer-specific tags and convert to Discourse-friendly format
def sanitize_post_message(message)
# Ensure the raw post string contents itself is acceptable to Discourse
sanitized_message = message&.tr("\0", '') || ""
# Remove the [signature] as we don't support this in Discourse
# sanitized_message.sub!(/\n?\[signature\]\n?\z/, '')
sanitized_message.gsub(/\n?\[signature\]\n?/, '')
# Convert Gossamer tags to Discourse markdown
sanitized_message.gsub!(/\[b\](.*?)\[\/b\]/, '**\1**')
sanitized_message.gsub!(/\[i\](.*?)\[\/i\]/, '*\1*')
sanitized_message.gsub!(/\[u\](.*?)\[\/u\]/, '<u>\1</u>')
sanitized_message.gsub!(/\[quote\](.*?)\[\/quote\]/m, '[quote]\1[/quote]')
sanitized_message.gsub!(/\[quote\s+user=(.*?)\](.*?)\[\/quote\]/m, '[quote \1]\2[/quote]')
sanitized_message.gsub!(/\[img\](.*?)\[\/img\]/, '![image](\1)')
sanitized_message.gsub!(/\[url=(.*?)\](.*?)\[\/url\]/, '[\2](\1)')
sanitized_message.gsub!(/\[size=(.*?)\](.*?)\[\/size\]/, '\2')
sanitized_message.gsub!(/\[color=(.*?)\](.*?)\[\/color\]/, '\2')
sanitized_message.gsub!(/\[font=(.*?)\](.*?)\[\/font\]/, '\2')
# Remove unsupported tags
sanitized_message.gsub!(/\[.*?\]/, '')
sanitized_message
end
# Fetch post views from the gforum_PostView table
def fetch_post_views(post_id)
result = execute_query("SELECT post_views FROM gforum_PostView WHERE post_id_fk = #{post_id} LIMIT 1").first
result ? result['post_views'] : 0
end
# Import topics and posts from Gossamer Forums to Discourse
def import_topics_and_posts_with_attachments
puts "Importing topics and posts with attachments..."
topic_last_post_time = {}
topic_post_count = Hash.new(0)
user_topic_count = Hash.new(0)
user_post_count = Hash.new(0)
topic_last_post_user = {}
topic_post_numbers = Hash.new { |hash, key| hash[key] = 0 }
# Execute the query to get all posts ordered by post_id
execute_query("SELECT * FROM gforum_Post ORDER BY post_id").each do |row|
puts "post_id #{row['post_id']} post_root_id #{row['post_root_id']} post_subject/title #{row['post_subject']} forum_id_fk/category_id #{row['forum_id_fk']}"
# discourse_user_id = @user_id_map[row['user_id_fk']]
# discourse_user_id = @user_id_map[row['user_id_fk']]
# Fetch the Discourse user and category IP mappings
discourse_user_id = fetch_user_id_mapping(row['user_id_fk'])
discourse_category_id = fetch_category_id_mapping(row['forum_id_fk'])
puts "discourse_user_id #{discourse_user_id} discourse_category_id #{discourse_category_id}"
next unless discourse_user_id && discourse_category_id
# Check if the post is a topic (post_root_id == 0)
if row['post_root_id'] == 0
puts "#1"
# Ensure the title is valid
title = ensure_valid_title(row['post_subject'])
# Confirm the number of views the post has had
post_views = fetch_post_views(row['post_id'])
# Skip if the topic already exists
unless TopicCustomField.exists?(name: 'original_gossamer_id', value: row['post_id'])
@ -642,20 +688,40 @@ class GossamerForumsImporter < ImportScripts::Base
title: title,
user_id: discourse_user_id,
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_latest_reply']),
category_id: discourse_category_id
updated_at: Time.at(row['post_time']),
category_id: discourse_category_id,
views: post_views || 0,
posts_count: 0
)
topic.custom_fields['original_gossamer_id'] = row['post_id']
topic.save!
# Track last post time and user for the topic
topic_last_post_time[topic.id] = Time.at(row['post_time'])
topic_last_post_user[topic.id] = discourse_user_id
# Increment the count of the number of topics created by each user
user_topic_count[discourse_user_id] += 1
# Create the initial post in the topic
# # Ensure the raw post stirng contents itself is acceptable to Discourse
# sanitized_post_message = row['post_message']&.tr("\0", '') || ""
#
# # Convert Gossamer tags to Discourse markdown
# sanitized_post_message = convert_gossamer_tags_to_markdown(sanitized_post_message)
#
# # Remove the [signature] label from appearing at the end of the messages after import
# sanitized_post_message.sub(/\n?\[signature\]\n?\z/, '')
# sanitized_post_message.gsub(/\n?\[signature\]\n?/, '')
# Sanitize the post message
sanitized_post_message = sanitize_post_message(row['post_message'])
puts "CREATE POST topic.id #{topic.id} discourse_user_id #{discourse_user_id}"
# Ensure the raw post stirng contents itself is acceptable to Discourse
sanitized_post_message = row['post_message']&.tr("\0", '') || ""
# Remove the [signature] label from appearing at the end of the messages after import
sanitized_post_message.sub(/\n?\[signature\]\n?\z/, '')
# Increment the number of posts in the given topic.
topic_post_numbers[topic.id] += 1
# Create the initial post in the topic
post = Post.create!(
topic_id: topic.id,
user_id: discourse_user_id,
@ -663,11 +729,18 @@ class GossamerForumsImporter < ImportScripts::Base
# raw: row['post_message'] || "",
raw: sanitized_post_message,
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_latest_reply'])
updated_at: Time.at(row['post_time']),
like_count: row['post_likes'] || 0,
reads: post_views || 0,
post_number: topic_post_numbers[topic.id]
)
post.custom_fields['original_gossamer_id'] = row['post_id']
post.save!
# Track the number of posts in the topic and by the user
topic_post_count[topic.id] += 1
user_post_count[discourse_user_id] += 1
# Handle attachments for the post
handle_post_attachments(row['post_id'], post, discourse_user_id)
@ -682,6 +755,10 @@ class GossamerForumsImporter < ImportScripts::Base
end
else
puts "#3"
# Confirm the number of views the post has had
post_views = fetch_post_views(row['post_id'])
# Find the root topic for the post
root_topic_field = TopicCustomField.find_by(name: 'original_gossamer_id', value: row['post_root_id'])
@ -696,14 +773,12 @@ class GossamerForumsImporter < ImportScripts::Base
begin
puts "#4"
# Ensure the raw post string contents itself is acceptable to Discourse
sanitized_post_message = row['post_message']&.tr("\0", '') || ""
# Convert Gossamer tags to Discourse markdown
sanitized_post_message = convert_gossamer_tags_to_markdown(sanitized_post_message)
# Sanitize the post message
sanitized_post_message = sanitize_post_message(row['post_message'])
# Create the post in the existing topic
topic_post_numbers[topic_id] += 1
# Remove the [signature] label from appearing at the end of the messages after import
sanitized_post_message.sub(/\n?\[signature\]\n?\z/, '')
post = Post.create!(
topic_id: topic_id,
user_id: discourse_user_id,
@ -711,14 +786,28 @@ class GossamerForumsImporter < ImportScripts::Base
# raw: row['post_message'] || "",
raw: sanitized_post_message,
created_at: Time.at(row['post_time']),
updated_at: Time.at(row['post_latest_reply']),
reply_to_post_number: reply_to_post_number
updated_at: Time.at(row['post_time']),
reply_to_post_number: reply_to_post_number,
like_count: row['post_replies'] || 0,
reads: post_views || topic_post_count[topic_id],
post_number: topic_post_numbers[topic_id]
)
post.custom_fields['original_gossamer_id'] = row['post_id']
post.save!
# Track the number of posts in the topic and by the user
topic_post_count[topic_id] += 1
user_post_count[discourse_user_id] += 1
# Update last post time and user for the topic
if topic_last_post_time[topic_id].nil? || Time.at(row['post_time']) > topic_last_post_time[topic_id]
topic_last_post_time[topic_id] = Time.at(row['post_time'])
topic_last_post_user[topic_id] = discourse_user_id
end
# Handle attachments for the post
handle_post_attachments(row['post_id'], post, discourse_user_id)
rescue ActiveRecord::RecordInvalid => e
puts "Error importing post with post_id #{row['post_id']}: #{e.message}"
end
@ -727,6 +816,30 @@ class GossamerForumsImporter < ImportScripts::Base
end
end
end
# Update topics with the correct last post time, post count, and last post user
topic_last_post_time.each do |topic_id, last_post_time|
Topic.find(topic_id).update!(
updated_at: last_post_time,
posts_count: topic_post_count[topic_id],
last_posted_at: last_post_time,
bumped_at: last_post_time,
last_post_user_id: topic_last_post_user[topic_id]
)
end
# Update user profiles with the number of topics and posts created
user_topic_count.each do |user_id, count|
user = User.find(user_id)
user.update!(topic_count: count)
end
user_post_count.each do |user_id, count|
user = User.find(user_id)
user.update!(post_count: count)
end
puts "Importing topics and posts with attachments... Done."
end
@ -744,7 +857,7 @@ class GossamerForumsImporter < ImportScripts::Base
unless TopicCustomField.exists?(name: 'original_gossamer_msg_id', value: row['msg_id'])
# Sanitize the message, ensuring we have an empty string or the content without any \0
sanitized_message = row['msg_body']&.tr("\0", '') || ""
sanitized_message = sanitize_post_message(row['msg_body'])
# Set default message body if the sanitized message is blank
sanitized_message = " " if sanitized_message.strip.empty?
@ -752,9 +865,6 @@ class GossamerForumsImporter < ImportScripts::Base
# # If we do not change the "min personal message post length" to 1, we need this.
# sanitized_message = sanitized_message.ljust(10, ' ') if sanitized_message.length < 10
# Convert potential Gossamer tags to Discourse markdown
sanitized_message = convert_gossamer_tags_to_markdown(sanitized_message)
# Check and set a default title if the original title is nil or empty
title = row['msg_subject']&.strip
title = "<no subject>" if title.nil? || title.empty?