From c2d4357262d1bc1c2410a25fe8d25bdfdc3efe6d Mon Sep 17 00:00:00 2001 From: Kharec Date: Tue, 11 Nov 2025 16:19:09 +0100 Subject: [PATCH] feat: keep only one method to get title of a page --- internal/services/url_metadata_service.go | 126 +--------------------- 1 file changed, 1 insertion(+), 125 deletions(-) diff --git a/internal/services/url_metadata_service.go b/internal/services/url_metadata_service.go index e8ea145..9d4436f 100644 --- a/internal/services/url_metadata_service.go +++ b/internal/services/url_metadata_service.go @@ -262,28 +262,7 @@ func (s *URLMetadataService) FetchTitle(ctx context.Context, rawURL string) (str } func (s *URLMetadataService) ExtractTitleFromHTML(html string) string { - - if title := s.ExtractFromTitleTag(html); title != "" { - return title - } - - if title := s.ExtractFromOpenGraph(html); title != "" { - return title - } - - if title := s.ExtractFromJSONLD(html); title != "" { - return title - } - - if title := s.ExtractFromTwitterCard(html); title != "" { - return title - } - - if title := s.extractFromMetaTags(html); title != "" { - return title - } - - return "" + return s.ExtractFromTitleTag(html) } func (s *URLMetadataService) ExtractFromTitleTag(htmlContent string) string { @@ -313,109 +292,6 @@ func (s *URLMetadataService) ExtractFromTitleTag(htmlContent string) string { } } -func (s *URLMetadataService) ExtractFromOpenGraph(htmlContent string) string { - - lines := strings.Split(htmlContent, "\n") - for _, line := range lines { - line = strings.TrimSpace(line) - if strings.Contains(strings.ToLower(line), `property="og:title"`) && strings.Contains(line, `content="`) { - start := strings.Index(line, `content="`) - if start != -1 { - start += 9 - end := strings.Index(line[start:], `"`) - if end != -1 { - title := line[start : start+end] - cleaned := s.optimizedTitleClean(title) - if cleaned != "" { - return cleaned - } - } - } - } - } - return "" -} - -func (s *URLMetadataService) ExtractFromJSONLD(htmlContent string) string { - - lines := strings.Split(htmlContent, "\n") - for _, line := range lines { - line = strings.TrimSpace(line) - if strings.Contains(line, `"@type":"VideoObject"`) || strings.Contains(line, `"@type":"WebPage"`) { - - if strings.Contains(line, `"name":`) { - start := strings.Index(line, `"name":`) - if start != -1 { - start += 7 - - for i := start; i < len(line); i++ { - if line[i] == '"' { - start = i + 1 - break - } - } - end := strings.Index(line[start:], `"`) - if end != -1 { - title := line[start : start+end] - cleaned := s.optimizedTitleClean(title) - if cleaned != "" { - return cleaned - } - } - } - } - } - } - return "" -} - -func (s *URLMetadataService) ExtractFromTwitterCard(htmlContent string) string { - - lines := strings.Split(htmlContent, "\n") - for _, line := range lines { - line = strings.TrimSpace(line) - if strings.Contains(strings.ToLower(line), `name="twitter:title"`) && strings.Contains(line, `content="`) { - start := strings.Index(line, `content="`) - if start != -1 { - start += 9 - end := strings.Index(line[start:], `"`) - if end != -1 { - title := line[start : start+end] - cleaned := s.optimizedTitleClean(title) - if cleaned != "" { - return cleaned - } - } - } - } - } - return "" -} - -func (s *URLMetadataService) extractFromMetaTags(htmlContent string) string { - - lines := strings.Split(htmlContent, "\n") - for _, line := range lines { - line = strings.TrimSpace(line) - - if strings.Contains(strings.ToLower(line), `name="title"`) && strings.Contains(line, `content="`) { - start := strings.Index(line, `content="`) - if start != -1 { - start += 9 - end := strings.Index(line[start:], `"`) - if end != -1 { - title := line[start : start+end] - cleaned := s.optimizedTitleClean(title) - if cleaned != "" { - return cleaned - } - } - } - } - } - return "" -} - func (s *URLMetadataService) optimizedTitleClean(title string) string { if title == "" { return ""