v0.22 Fix incremental support for post handling

This commit is contained in:
David Sainty 2024-07-01 16:08:07 +10:00
parent 752499806a
commit dd5f3a9333

View File

@ -1,5 +1,5 @@
# gossamer threads migration-import code
# v0.21.5
# v0.22
require 'mysql2'
require 'open-uri'
@ -109,6 +109,12 @@ class GossamerForumsImporter < ImportScripts::Base
post_number INTEGER DEFAULT 0
);
SQL
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS highest_processed_post_id (
id INTEGER PRIMARY KEY CHECK (id = 1),
post_id INTEGER
);
SQL
end
def insert_user_id_mapping(old_user_id, new_user_id)
@ -172,6 +178,11 @@ class GossamerForumsImporter < ImportScripts::Base
end
end
# Fetch the highest old_post_id from the url_map table
def fetch_highest_old_post_id
@db.get_first_value "SELECT MAX(old_post_id) FROM url_map"
end
# Helper methods to interact with the SQLite database for persisting topic-post related values
def fetch_db(table, key)
@db.get_first_value "SELECT value FROM #{table} WHERE key = ?", key
@ -229,6 +240,16 @@ class GossamerForumsImporter < ImportScripts::Base
@db.execute "INSERT OR REPLACE INTO topic_post_numbers (topic_id, post_number) VALUES (?, ?)", topic_id, post_number
end
# Fetch the highest processed post_id from the highest_processed_post_id table
def fetch_highest_processed_post_id
@db.get_first_value "SELECT post_id FROM highest_processed_post_id WHERE id = 1"
end
# Update the highest processed post_id in the highest_processed_post_id table
def update_highest_processed_post_id(post_id)
@db.execute "INSERT OR REPLACE INTO highest_processed_post_id (id, post_id) VALUES (1, ?)", post_id
end
# Execute an SQL query on the Gossamer Forums database
def execute_query(query)
@ -762,9 +783,23 @@ class GossamerForumsImporter < ImportScripts::Base
# topic_last_post_user = {}
# topic_post_numbers = Hash.new { |hash, key| hash[key] = 0 }
# Fetch the highest old_post_id from the url_map table
# FIX THIS PROPERLY
highest_old_post_id = fetch_highest_old_post_id.to_i
puts "Highest (OP) old_post_id in url_map: #{highest_old_post_id}"
highest_processed_post_id = fetch_highest_processed_post_id.to_i
puts "Highest processed post_id: #{highest_processed_post_id}"
# Execute the query to get all posts ordered by post_id
execute_query("SELECT * FROM gforum_Post ORDER BY post_id").each do |row|
puts "post_id #{row['post_id']} post_root_id #{row['post_root_id']} post_subject/title #{row['post_subject']} forum_id_fk/category_id #{row['forum_id_fk']}"
post_id = row['post_id'].to_i
# Skip posts that have already been processed
# FIX THIS PROPERLY FOR FUTURE RUNS
next if post_id < highest_old_post_id
# next if post_id <= highest_processed_post_id
puts "Processing post_id #{row['post_id']} post_id #{row['post_root_id']} post_subject/title #{row['post_subject']} forum_id_fk/category_id #{row['forum_id_fk']}"
# discourse_user_id = @user_id_map[row['user_id_fk']]
# Fetch the Discourse user and category IP mappings
@ -860,6 +895,10 @@ class GossamerForumsImporter < ImportScripts::Base
# old_url = "https://old/forum/#{row['forum_name']}/topics/#{row['post_id']}"
new_url = "https://new/t/#{topic.slug}/#{topic.id}"
insert_url_mapping(row['post_id'], new_url, title)
# Update the highest processed post_id
puts "Updated highest processed post_id #{post_id}"
update_highest_processed_post_id(post_id)
rescue ActiveRecord::RecordInvalid => e
puts "Error importing topic with post_id #{row['post_id']}: #{e.message}"
@ -927,6 +966,10 @@ class GossamerForumsImporter < ImportScripts::Base
# Handle attachments for the post
handle_post_attachments(row['post_id'], post, discourse_user_id)
# Update the highest processed post_id
puts "Updated highest processed post_id #{post_id}"
update_highest_processed_post_id(post_id)
rescue ActiveRecord::RecordInvalid => e
puts "Error importing post with post_id #{row['post_id']}: #{e.message}"