DeltaScrape.java

package jasper.component.script;

import jakarta.persistence.EntityManager;
import jasper.component.Ingest;
import jasper.component.Scraper;
import jasper.component.Tagger;
import jasper.domain.Ref;
import jasper.errors.ModifiedException;
import jasper.errors.NotFoundException;
import jasper.repository.RefRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Component;

import static jasper.domain.proj.HasOrigin.origin;
import static jasper.domain.proj.Tag.matchesTag;
import static jasper.util.Logging.getMessage;

@Profile("proxy")
@Component
public class DeltaScrape {
	private static final Logger logger = LoggerFactory.getLogger(DeltaScrape.class);

	@Autowired
	Scraper scraper;

	@Autowired
	Ingest ingest;

	@Autowired
	Tagger tagger;

	@Autowired
	RefRepository refRepository;

	@Autowired
	EntityManager em;

	public void runScript(Ref ref) {
		try {
			logger.info("{} Scraping {}", ref.getOrigin(), ref.getUrl());
			var tags = ref.getExpandedTags();
			tagger.tag(ref.getUrl(), ref.getOrigin(), "-_plugin/delta/scrape");
			var web = scraper.web(ref.getUrl(), ref.getOrigin());
			// Fetch Ref again in case scrape modified it
			ref = fetch(ref.getUrl(), ref.getOrigin());
			em.detach(ref);
			var scrapeAll = tags.stream().anyMatch(t -> matchesTag("_plugin/delta/scrape/ref", t));
			if (scrapeAll || tags.stream().anyMatch(t -> matchesTag("_plugin/delta/scrape/title", t))) ref.setTitle(web.getTitle());
			if (scrapeAll || tags.stream().anyMatch(t -> matchesTag("_plugin/delta/scrape/comment", t))) ref.setComment(web.getComment());
			if (scrapeAll || tags.stream().anyMatch(t -> matchesTag("_plugin/delta/scrape/sources", t))) ref.setSources(web.getSources());
			if (scrapeAll || tags.stream().anyMatch(t -> matchesTag("_plugin/delta/scrape/alts", t))) ref.setAlternateUrls(web.getAlternateUrls());
			if (scrapeAll || tags.stream().anyMatch(t -> matchesTag("_plugin/delta/scrape/plugins", t))) ref.setPlugins(web.getPlugins());
			if (scrapeAll || tags.stream().anyMatch(t -> matchesTag("_plugin/delta/scrape/tags", t))) ref.setTags(web.getTags());
			if (scrapeAll || tags.stream().anyMatch(t -> matchesTag("_plugin/delta/scrape/published", t))) ref.setPublished(web.getPublished());
			ref.removeTag("_plugin/delta/scrape");
			ingest.update(ref.getOrigin(), ref);
		} catch (ModifiedException ignored) {
		} catch (Exception e) {
			logger.warn("{} Unexpected error scraping Ref {}", ref.getOrigin(), ref.getUrl());
			tagger.attachError(ref.getUrl(), ref.getOrigin(), "Error Fetching for _plugin/delta/scrape", getMessage(e));
		}
	}

	private Ref fetch(String url, String origin) {
		return refRepository.findOneByUrlAndOrigin(url, origin(origin))
			.orElseThrow(() -> new NotFoundException("Async"));
	}
}