feat: keep seeding fast and predictable even when parallelized

This commit is contained in:
2025-11-21 16:16:35 +01:00
parent a5b4e9bf25
commit 65576cc623
2 changed files with 133 additions and 47 deletions

View File

@@ -2,9 +2,9 @@ package commands
import ( import (
"context" "context"
"crypto/rand" cryptoRand "crypto/rand"
"fmt" "fmt"
"math/big" "math/rand"
"runtime" "runtime"
"sync" "sync"
"time" "time"
@@ -17,14 +17,24 @@ type ParallelProcessor struct {
maxWorkers int maxWorkers int
timeout time.Duration timeout time.Duration
passwordHash string passwordHash string
randSource *rand.Rand
randMu sync.Mutex
} }
func NewParallelProcessor() *ParallelProcessor { func NewParallelProcessor() *ParallelProcessor {
maxWorkers := max(min(runtime.NumCPU(), 8), 2) maxWorkers := max(min(runtime.NumCPU(), 8), 2)
seed := time.Now().UnixNano()
seedBytes := make([]byte, 8)
if _, err := cryptoRand.Read(seedBytes); err == nil {
seed = int64(seedBytes[0])<<56 | int64(seedBytes[1])<<48 | int64(seedBytes[2])<<40 | int64(seedBytes[3])<<32 |
int64(seedBytes[4])<<24 | int64(seedBytes[5])<<16 | int64(seedBytes[6])<<8 | int64(seedBytes[7])
}
return &ParallelProcessor{ return &ParallelProcessor{
maxWorkers: maxWorkers, maxWorkers: maxWorkers,
timeout: 30 * time.Second, timeout: 60 * time.Second,
randSource: rand.New(rand.NewSource(seed)),
} }
} }
@@ -73,8 +83,21 @@ func (p *ParallelProcessor) CreateUsersInParallel(userRepo repositories.UserRepo
users := make([]database.User, count) users := make([]database.User, count)
completed := 0 completed := 0
firstError := make(chan error, 1)
for { go func() {
for err := range errors {
if err != nil {
select {
case firstError <- err:
default:
}
return
}
}
}()
for completed < count {
select { select {
case result, ok := <-results: case result, ok := <-results:
if !ok { if !ok {
@@ -85,14 +108,14 @@ func (p *ParallelProcessor) CreateUsersInParallel(userRepo repositories.UserRepo
if progress != nil { if progress != nil {
progress.Update(completed) progress.Update(completed)
} }
case err := <-errors: case err := <-firstError:
if err != nil {
return nil, err return nil, err
}
case <-ctx.Done(): case <-ctx.Done():
return nil, fmt.Errorf("timeout creating users: %w", ctx.Err()) return nil, fmt.Errorf("timeout creating users: %w", ctx.Err())
} }
} }
return users, nil
} }
func (p *ParallelProcessor) CreatePostsInParallel(postRepo repositories.PostRepository, authorID uint, count int, progress *ProgressIndicator) ([]database.Post, error) { func (p *ParallelProcessor) CreatePostsInParallel(postRepo repositories.PostRepository, authorID uint, count int, progress *ProgressIndicator) ([]database.Post, error) {
@@ -136,8 +159,21 @@ func (p *ParallelProcessor) CreatePostsInParallel(postRepo repositories.PostRepo
posts := make([]database.Post, count) posts := make([]database.Post, count)
completed := 0 completed := 0
firstError := make(chan error, 1)
for { go func() {
for err := range errors {
if err != nil {
select {
case firstError <- err:
default:
}
return
}
}
}()
for completed < count {
select { select {
case result, ok := <-results: case result, ok := <-results:
if !ok { if !ok {
@@ -148,14 +184,14 @@ func (p *ParallelProcessor) CreatePostsInParallel(postRepo repositories.PostRepo
if progress != nil { if progress != nil {
progress.Update(completed) progress.Update(completed)
} }
case err := <-errors: case err := <-firstError:
if err != nil {
return nil, err return nil, err
}
case <-ctx.Done(): case <-ctx.Done():
return nil, fmt.Errorf("timeout creating posts: %w", ctx.Err()) return nil, fmt.Errorf("timeout creating posts: %w", ctx.Err())
} }
} }
return posts, nil
} }
func (p *ParallelProcessor) CreateVotesInParallel(voteRepo repositories.VoteRepository, users []database.User, posts []database.Post, avgVotesPerPost int, progress *ProgressIndicator) (int, error) { func (p *ParallelProcessor) CreateVotesInParallel(voteRepo repositories.VoteRepository, users []database.User, posts []database.Post, avgVotesPerPost int, progress *ProgressIndicator) (int, error) {
@@ -199,8 +235,21 @@ func (p *ParallelProcessor) CreateVotesInParallel(voteRepo repositories.VoteRepo
totalVotes := 0 totalVotes := 0
completed := 0 completed := 0
firstError := make(chan error, 1)
for { go func() {
for err := range errors {
if err != nil {
select {
case firstError <- err:
default:
}
return
}
}
}()
for completed < len(posts) {
select { select {
case result, ok := <-results: case result, ok := <-results:
if !ok { if !ok {
@@ -211,14 +260,14 @@ func (p *ParallelProcessor) CreateVotesInParallel(voteRepo repositories.VoteRepo
if progress != nil { if progress != nil {
progress.Update(completed) progress.Update(completed)
} }
case err := <-errors: case err := <-firstError:
if err != nil {
return 0, err return 0, err
}
case <-ctx.Done(): case <-ctx.Done():
return 0, fmt.Errorf("timeout creating votes: %w", ctx.Err()) return 0, fmt.Errorf("timeout creating votes: %w", ctx.Err())
} }
} }
return totalVotes, nil
} }
func (p *ParallelProcessor) UpdatePostScoresInParallel(postRepo repositories.PostRepository, voteRepo repositories.VoteRepository, posts []database.Post, progress *ProgressIndicator) error { func (p *ParallelProcessor) UpdatePostScoresInParallel(postRepo repositories.PostRepository, voteRepo repositories.VoteRepository, posts []database.Post, progress *ProgressIndicator) error {
@@ -284,19 +333,20 @@ type voteResult struct {
index int index int
} }
func generateRandomIdentifier() string { func (p *ParallelProcessor) generateRandomIdentifier() string {
const length = 12 const length = 12
const chars = "abcdefghijklmnopqrstuvwxyz0123456789" const chars = "abcdefghijklmnopqrstuvwxyz0123456789"
identifier := make([]byte, length) identifier := make([]byte, length)
p.randMu.Lock()
for i := range identifier { for i := range identifier {
num, _ := rand.Int(rand.Reader, big.NewInt(int64(len(chars)))) identifier[i] = chars[p.randSource.Intn(len(chars))]
identifier[i] = chars[num.Int64()]
} }
p.randMu.Unlock()
return string(identifier) return string(identifier)
} }
func (p *ParallelProcessor) createSingleUser(userRepo repositories.UserRepository, index int) (database.User, error) { func (p *ParallelProcessor) createSingleUser(userRepo repositories.UserRepository, index int) (database.User, error) {
randomID := generateRandomIdentifier() randomID := p.generateRandomIdentifier()
username := fmt.Sprintf("user_%s", randomID) username := fmt.Sprintf("user_%s", randomID)
email := fmt.Sprintf("user_%s@goyco.local", randomID) email := fmt.Sprintf("user_%s@goyco.local", randomID)
@@ -315,7 +365,7 @@ func (p *ParallelProcessor) createSingleUser(userRepo repositories.UserRepositor
} }
if err := userRepo.Create(user); err != nil { if err := userRepo.Create(user); err != nil {
randomID = generateRandomIdentifier() randomID = p.generateRandomIdentifier()
username = fmt.Sprintf("user_%s", randomID) username = fmt.Sprintf("user_%s", randomID)
email = fmt.Sprintf("user_%s@goyco.local", randomID) email = fmt.Sprintf("user_%s@goyco.local", randomID)
continue continue
@@ -370,7 +420,7 @@ func (p *ParallelProcessor) createSinglePost(postRepo repositories.PostRepositor
} }
domain := sampleDomains[index%len(sampleDomains)] domain := sampleDomains[index%len(sampleDomains)]
randomID := generateRandomIdentifier() randomID := p.generateRandomIdentifier()
path := fmt.Sprintf("/article/%s", randomID) path := fmt.Sprintf("/article/%s", randomID)
url := fmt.Sprintf("https://%s%s", domain, path) url := fmt.Sprintf("https://%s%s", domain, path)
@@ -389,7 +439,7 @@ func (p *ParallelProcessor) createSinglePost(postRepo repositories.PostRepositor
} }
if err := postRepo.Create(post); err != nil { if err := postRepo.Create(post); err != nil {
randomID = generateRandomIdentifier() randomID = p.generateRandomIdentifier()
path = fmt.Sprintf("/article/%s", randomID) path = fmt.Sprintf("/article/%s", randomID)
url = fmt.Sprintf("https://%s%s", domain, path) url = fmt.Sprintf("https://%s%s", domain, path)
continue continue
@@ -402,31 +452,37 @@ func (p *ParallelProcessor) createSinglePost(postRepo repositories.PostRepositor
} }
func (p *ParallelProcessor) createVotesForPost(voteRepo repositories.VoteRepository, users []database.User, post database.Post, avgVotesPerPost int) (int, error) { func (p *ParallelProcessor) createVotesForPost(voteRepo repositories.VoteRepository, users []database.User, post database.Post, avgVotesPerPost int) (int, error) {
voteCount, _ := rand.Int(rand.Reader, big.NewInt(int64(avgVotesPerPost*2)+1)) p.randMu.Lock()
numVotes := int(voteCount.Int64()) numVotes := p.randSource.Intn(avgVotesPerPost*2 + 1)
p.randMu.Unlock()
if numVotes == 0 && avgVotesPerPost > 0 { if numVotes == 0 && avgVotesPerPost > 0 {
chance, _ := rand.Int(rand.Reader, big.NewInt(5)) p.randMu.Lock()
if chance.Int64() > 0 { if p.randSource.Intn(5) > 0 {
numVotes = 1 numVotes = 1
} }
p.randMu.Unlock()
} }
totalVotes := 0 totalVotes := 0
usedUsers := make(map[uint]bool) usedUsers := make(map[uint]bool)
for i := 0; i < numVotes && len(usedUsers) < len(users); i++ { for i := 0; i < numVotes && len(usedUsers) < len(users); i++ {
userIdx, _ := rand.Int(rand.Reader, big.NewInt(int64(len(users)))) p.randMu.Lock()
user := users[userIdx.Int64()] userIdx := p.randSource.Intn(len(users))
p.randMu.Unlock()
user := users[userIdx]
if usedUsers[user.ID] { if usedUsers[user.ID] {
continue continue
} }
usedUsers[user.ID] = true usedUsers[user.ID] = true
voteTypeInt, _ := rand.Int(rand.Reader, big.NewInt(10)) p.randMu.Lock()
voteTypeInt := p.randSource.Intn(10)
p.randMu.Unlock()
var voteType database.VoteType var voteType database.VoteType
if voteTypeInt.Int64() < 7 { if voteTypeInt < 7 {
voteType = database.VoteUp voteType = database.VoteUp
} else { } else {
voteType = database.VoteDown voteType = database.VoteDown

View File

@@ -1,13 +1,15 @@
package commands package commands
import ( import (
"crypto/rand" cryptoRand "crypto/rand"
"errors" "errors"
"flag" "flag"
"fmt" "fmt"
"math/big" "math/rand"
"os" "os"
"strings" "strings"
"sync"
"time"
"goyco/internal/config" "goyco/internal/config"
"goyco/internal/database" "goyco/internal/database"
@@ -17,6 +19,34 @@ import (
"gorm.io/gorm" "gorm.io/gorm"
) )
var (
seedRandSource *rand.Rand
seedRandOnce sync.Once
)
func initSeedRand() {
seedRandOnce.Do(func() {
seed := time.Now().UnixNano()
seedBytes := make([]byte, 8)
if _, err := cryptoRand.Read(seedBytes); err == nil {
seed = int64(seedBytes[0])<<56 | int64(seedBytes[1])<<48 | int64(seedBytes[2])<<40 | int64(seedBytes[3])<<32 |
int64(seedBytes[4])<<24 | int64(seedBytes[5])<<16 | int64(seedBytes[6])<<8 | int64(seedBytes[7])
}
seedRandSource = rand.New(rand.NewSource(seed))
})
}
func generateRandomIdentifier() string {
initSeedRand()
const length = 12
const chars = "abcdefghijklmnopqrstuvwxyz0123456789"
identifier := make([]byte, length)
for i := range identifier {
identifier[i] = chars[seedRandSource.Intn(len(chars))]
}
return string(identifier)
}
func HandleSeedCommand(cfg *config.Config, name string, args []string) error { func HandleSeedCommand(cfg *config.Config, name string, args []string) error {
fs := newFlagSet(name, printSeedUsage) fs := newFlagSet(name, printSeedUsage)
if err := parseCommand(fs, args, name); err != nil { if err := parseCommand(fs, args, name); err != nil {
@@ -360,44 +390,44 @@ func createRandomPosts(postRepo repositories.PostRepository, authorID uint, coun
} }
func generateRandomPath() string { func generateRandomPath() string {
pathLength, _ := rand.Int(rand.Reader, big.NewInt(20)) initSeedRand()
pathLength := seedRandSource.Intn(20)
path := "/article/" path := "/article/"
for i := int64(0); i < pathLength.Int64()+5; i++ { for i := 0; i < pathLength+5; i++ {
randomChar, _ := rand.Int(rand.Reader, big.NewInt(26)) randomChar := seedRandSource.Intn(26)
path += string(rune('a' + randomChar.Int64())) path += string(rune('a' + randomChar))
} }
return path return path
} }
func createRandomVotes(voteRepo repositories.VoteRepository, users []database.User, posts []database.Post, avgVotesPerPost int) (int, error) { func createRandomVotes(voteRepo repositories.VoteRepository, users []database.User, posts []database.Post, avgVotesPerPost int) (int, error) {
initSeedRand()
totalVotes := 0 totalVotes := 0
for _, post := range posts { for _, post := range posts {
voteCount, _ := rand.Int(rand.Reader, big.NewInt(int64(avgVotesPerPost*2)+1)) numVotes := seedRandSource.Intn(avgVotesPerPost*2 + 1)
numVotes := int(voteCount.Int64())
if numVotes == 0 && avgVotesPerPost > 0 { if numVotes == 0 && avgVotesPerPost > 0 {
chance, _ := rand.Int(rand.Reader, big.NewInt(5)) if seedRandSource.Intn(5) > 0 {
if chance.Int64() > 0 {
numVotes = 1 numVotes = 1
} }
} }
usedUsers := make(map[uint]bool) usedUsers := make(map[uint]bool)
for i := 0; i < numVotes && len(usedUsers) < len(users); i++ { for i := 0; i < numVotes && len(usedUsers) < len(users); i++ {
userIdx, _ := rand.Int(rand.Reader, big.NewInt(int64(len(users)))) userIdx := seedRandSource.Intn(len(users))
user := users[userIdx.Int64()] user := users[userIdx]
if usedUsers[user.ID] { if usedUsers[user.ID] {
continue continue
} }
usedUsers[user.ID] = true usedUsers[user.ID] = true
voteTypeInt, _ := rand.Int(rand.Reader, big.NewInt(10)) voteTypeInt := seedRandSource.Intn(10)
var voteType database.VoteType var voteType database.VoteType
if voteTypeInt.Int64() < 7 { if voteTypeInt < 7 {
voteType = database.VoteUp voteType = database.VoteUp
} else { } else {
voteType = database.VoteDown voteType = database.VoteDown