v0.44 Try removing connection_pool and starting MySQL within thread method and fix handle_post_attachment

This commit is contained in:
David Sainty 2024-08-18 04:39:51 +10:00
parent 798153c8fc
commit 90a2d2fa37

View File

@ -1,7 +1,7 @@
# Federated Computer, Inc. # Federated Computer, Inc.
# David Sainty <saint@federated.computer> 2024 A.D. # David Sainty <saint@federated.computer> 2024 A.D.
# Gossamer Threads to Discourse -- Migration-Import Script # Gossamer Threads to Discourse -- Migration-Import Script
# v0.43 Move to connection pooling for MySQL problem # v0.44 Try removing connection_pool and starting MySQL within thread method and fix handle_post_attachment
require 'mysql2' require 'mysql2'
require 'open-uri' require 'open-uri'
@ -490,8 +490,8 @@ class GossamerForumsImporter < ImportScripts::Base
# Helper method to handle post attachments # Helper method to handle post attachments
def handle_post_attachments(gossamer_post_id, post, user_id) def handle_post_attachments(gossamer_post_id, post, user_id, mysql_client)
execute_query("SELECT * FROM gforum_PostAttachment WHERE post_id_fk = #{gossamer_post_id}").each do |att_row| execute_query_concurrent("SELECT * FROM gforum_PostAttachment WHERE post_id_fk = #{gossamer_post_id}", mysql_client).each do |att_row|
attachment_url = "https://forum.slowtwitch.com/forum/?do=post_attachment;postatt_id=#{att_row['postatt_id']}" attachment_url = "https://forum.slowtwitch.com/forum/?do=post_attachment;postatt_id=#{att_row['postatt_id']}"
puts "Handling attachment: #{attachment_url}" puts "Handling attachment: #{attachment_url}"
attachment_data = download_attachment(attachment_url) attachment_data = download_attachment(attachment_url)
@ -1068,15 +1068,16 @@ class GossamerForumsImporter < ImportScripts::Base
# Get list of TOPICS / OP posts, i.e. post ids that have no parent / root id - SELECT post_id FROM gforum_Post WHERE post_root_id = 0; # Get list of TOPICS / OP posts, i.e. post ids that have no parent / root id - SELECT post_id FROM gforum_Post WHERE post_root_id = 0;
def threaded_topic_import def threaded_topic_import
# Define the connection pool inside the method
mariadb_pool = ConnectionPool.new(size: 20, timeout: 500) do ### # Define the connection pool inside the method
Mysql2::Client.new( ### mariadb_pool = ConnectionPool.new(size: 20, timeout: 500) do
host: "slowtwitch.northend.network", ### Mysql2::Client.new(
username: "admin", ### host: "slowtwitch.northend.network",
password: "yxnh93Ybbz2Nm8#mp28zCVv", ### username: "admin",
database: "slowtwitch" ### password: "yxnh93Ybbz2Nm8#mp28zCVv",
) ### database: "slowtwitch"
end ### )
### end
# The query selects post_ids from gforum_Post where post_root_id is 0, meaning these posts are the topic starters (OPs). # The query selects post_ids from gforum_Post where post_root_id is 0, meaning these posts are the topic starters (OPs).
# Execute the query and fetch the result # Execute the query and fetch the result
@ -1145,7 +1146,7 @@ class GossamerForumsImporter < ImportScripts::Base
#### puts "PP 22 -- #{post_id}" #### puts "PP 22 -- #{post_id}"
begin begin
mariadb_pool.with do |mysql_client| ##### mariadb_pool.with do |mysql_client|
puts "PP 11 -- #{post_id} -- Checking MySQL connections status.." puts "PP 11 -- #{post_id} -- Checking MySQL connections status.."
#### puts " FIRST Checking MySQL connection status..." #### puts " FIRST Checking MySQL connection status..."
@ -1160,7 +1161,7 @@ class GossamerForumsImporter < ImportScripts::Base
post_status = post_status(post_id) post_status = post_status(post_id)
if post_status.nil? || post_status == 0 if post_status.nil? || post_status == 0
puts "Starting import for post_id #{post_id}" puts "Starting import for post_id #{post_id}"
topic_import_job(post_id, mysql_client, sqlite_mutex) # Import topic and its replies topic_import_job(post_id, sqlite_mutex) # Import topic and its replies
sqlite_mutex.synchronize do sqlite_mutex.synchronize do
mark_post_as_complete(post_id) # Mark as complete in SQLite table mark_post_as_complete(post_id) # Mark as complete in SQLite table
end end
@ -1168,7 +1169,7 @@ class GossamerForumsImporter < ImportScripts::Base
puts "Skipping post_id #{post_id}, already processed." puts "Skipping post_id #{post_id}, already processed."
end end
end end
end ##### end
rescue => e rescue => e
puts "Error processing post ID #{post_id}: #{e.message}" puts "Error processing post ID #{post_id}: #{e.message}"
sqlite_mutex.synchronize do sqlite_mutex.synchronize do
@ -1216,12 +1217,21 @@ class GossamerForumsImporter < ImportScripts::Base
# end # end
# Method to import an entire topic, including its first post and all subsequent replies # Method to import an entire topic, including its first post and all subsequent replies
def topic_import_job(post_id, mysql_client, sqlite_mutex) ##### def topic_import_job(post_id, mysql_client, sqlite_mutex)
#Here is where you can import the entire topic def topic_import_job(post_id, sqlite_mutex)
#Get post -- SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_id = post_id puts "TIJ ZZ post_id #{post_id}"
#check if exists, create if not mysql_client = Mysql2::Client.new(
#get children, create -- SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_root_id = post_id host: "slowtwitch.northend.network",
#this parts needs to be synchronously to avoid race conditions username: "admin",
password: "yxnh93Ybbz2Nm8#mp28zCVv",
database: "slowtwitch"
)
puts " FIRST Checking MySQL connection status..."
if mysql_client.query('SELECT 1').nil?
puts " MySQL connection is not valid"
else
puts " MySQL connection is valid"
end
puts "TIJ AA post_id #{post_id}" puts "TIJ AA post_id #{post_id}"
# Fetch the post data for the given post_id (this is the first post in the topic) # Fetch the post data for the given post_id (this is the first post in the topic)
row = execute_query_concurrent("SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_id = #{post_id}", mysql_client).first row = execute_query_concurrent("SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_id = #{post_id}", mysql_client).first
@ -1330,7 +1340,7 @@ class GossamerForumsImporter < ImportScripts::Base
puts "TIJ HH post_id #{post_id}" puts "TIJ HH post_id #{post_id}"
# Handle any attachments associated with the post # Handle any attachments associated with the post
handle_post_attachments(row['post_id'], post, discourse_user_id) handle_post_attachments(row['post_id'], post, discourse_user_id, mysql_client)
# Create URL mappings for the new topic # Create URL mappings for the new topic
new_url = "https://new/t/#{topic.slug}/#{topic.id}" new_url = "https://new/t/#{topic.slug}/#{topic.id}"
@ -1386,7 +1396,7 @@ class GossamerForumsImporter < ImportScripts::Base
end end
# Handle any attachments associated with the reply # Handle any attachments associated with the reply
handle_post_attachments(reply_row['post_id'], post, reply_user_id) handle_post_attachments(reply_row['post_id'], post, reply_user_id, mysql_client)
# # Update the highest processed post_id in the database (thread-safe) # # Update the highest processed post_id in the database (thread-safe)
# update_highest_processed_post_id_thread_safe(reply_row['post_id']) # update_highest_processed_post_id_thread_safe(reply_row['post_id'])
@ -1408,6 +1418,9 @@ class GossamerForumsImporter < ImportScripts::Base
else else
puts "Topic for post_id #{row['post_id']} already exists, skipping creation." puts "Topic for post_id #{row['post_id']} already exists, skipping creation."
end end
puts " LAST Removing MySQL connection"
mysql_client.close # if mysql_client
end end