FileCache.java
package jasper.component;
import io.github.resilience4j.bulkhead.annotation.Bulkhead;
import io.micrometer.core.annotation.Timed;
import jasper.domain.Ref;
import jasper.errors.NotFoundException;
import jasper.errors.ScrapeProtocolException;
import jasper.plugin.Cache;
import jasper.repository.RefRepository;
import jasper.repository.filter.RefFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Component;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import static jasper.domain.proj.HasTags.hasMatchingTag;
import static jasper.plugin.Cache.bannedOrBroken;
import static jasper.plugin.Cache.getCache;
import static jasper.plugin.Pull.getPull;
import static jasper.util.Logging.getMessage;
import static org.apache.commons.io.IOUtils.closeQuietly;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
@Profile("file-cache")
@Component
public class FileCache {
private static final Logger logger = LoggerFactory.getLogger(FileCache.class);
static final String CACHE = "cache";
@Autowired
ConfigCache configs;
@Autowired
RefRepository refRepository;
@Autowired
Storage storage;
@Autowired
Fetch fetch;
@Autowired
Images images;
@Autowired
Tagger tagger;
@Timed(value = "jasper.cache")
@Bulkhead(name = "recycler")
public void clearDeleted(String origin) {
logger.info("{} Purging file cache", origin);
var start = Instant.now();
storage.visitStorage(origin, CACHE, id -> {
if (!refRepository.cacheExists(id)) {
try {
storage.delete(origin, CACHE, id);
} catch (IOException e) {
logger.error("Cannot delete file", e);
}
}
});
logger.info("{} Finished purging file cache in {}", origin, Duration.between(start, Instant.now()));
}
@Timed(value = "jasper.cache", histogram = true)
public void preFetch(String url, String origin, boolean thumbnail) {
if (exists(url, origin)) return;
if (thumbnail) {
fetchThumbnail(url, origin);
} else {
fetch(url, origin, true);
}
}
@Timed(value = "jasper.cache", histogram = true)
public void refresh(String url, String origin) {
fetch(url, origin, true);
}
private boolean exists(String url, String origin) {
return refRepository.exists(RefFilter.builder()
.url(url)
.origin(origin)
.query("_plugin/cache:!_plugin/delta/cache")
.build().spec());
}
@Timed(value = "jasper.cache")
public String fetchString(String url, String origin) {
var is = fetch(url, origin);
if (is == null) return null;
try (is) {
var cache = cache(url, origin);
if (cache == null) return null;
if (bannedOrBroken(cache)) return null;
return new String(is.readAllBytes());
} catch (IOException e) {
return null;
}
}
@Timed(value = "jasper.cache")
public InputStream fetch(String url, String origin) {
return fetch(url, origin, false);
}
@Timed(value = "jasper.cache")
public InputStream fetch(String url, String origin, boolean refresh) {
var existingCache = cache(url, origin);
if (bannedOrBroken(existingCache, refresh)) return null;
if (!refresh && existingCache != null && !existingCache.isNoStore() && storage.exists(origin, CACHE, existingCache.getId())) {
return storage.stream(origin, CACHE, existingCache.getId());
}
if (url.startsWith("cache:")) {
var id = url.substring("cache:".length());
if (storage.exists(origin, CACHE, id)) {
return storage.stream(origin, CACHE, id);
} else if (configs.getRemote(origin) == null) {
// No cache found and no remote to fetch from
return null;
}
}
if (hasMatchingTag(stat(url, origin), "+plugin/error")) return null;
String mimeType;
String id;
try (var res = fetch.doScrape(url, origin)) {
if (res == null) return null;
var remote = configs.getRemote(origin);
var pull = getPull(remote);
if (remote != null && (url.startsWith("cache:") || pull.isCacheProxy())) {
if (url.startsWith("cache:")) {
id = url.substring("cache:".length());
} else {
id = cache(url, origin).getId();
}
if (storage.exists(origin, CACHE, id)) return storage.stream(origin, CACHE, id);
return null;
}
mimeType = res.getMimeType();
try (var is = res.getInputStream()) {
if (existingCache != null && isNotBlank(existingCache.getId()) && !storage.exists(origin, CACHE, existingCache.getId())) {
id = existingCache.getId();
storage.storeAt(origin, CACHE, id, is);
} else {
id = storage.store(origin, CACHE, is);
}
}
var cache = Cache.builder()
.id(id)
.mimeType(mimeType)
.contentLength(storage.size(origin, CACHE, id))
.build();
tagger.plugin(url, origin, "_plugin/cache", cache, "-_plugin/delta/cache");
return storage.stream(origin, CACHE, id);
} catch (ScrapeProtocolException e) {
throw e;
} catch (Exception e) {
logger.error("{} Error Fetching {}", origin, url);
var err = tagger.plugin(url, origin, "_plugin/cache", null, "-_plugin/delta/cache");
tagger.attachError(origin, err,
"Error Fetching: " + getMessage(e));
if (configs.getRemote(origin) != null) {
var cache = existingCache != null ? existingCache : Cache.builder().build();
cache.setBan(true);
tagger.plugin(url, origin, "_plugin/cache", cache);
}
return null;
} finally {
for (var other : createArchive(url, origin, cache(url, origin))) cacheLater(other, origin);
}
}
private Ref stat(String url, String origin) {
return refRepository.findOneByUrlAndOrigin(url, origin).orElse(null);
}
private Cache cache(String url, String origin) {
return getCache(stat(url, origin));
}
@Timed(value = "jasper.cache")
public InputStream fetchThumbnail(String url, String origin) {
var id = "";
if (url.startsWith("cache:")) {
id = url.substring("cache:".length());
}
closeQuietly(fetch(url, origin));
var fullSize = cache(url, origin);
if (fullSize == null) {
if (configs.getRemote(origin) == null) return null;
} else {
id = fullSize.getId();
}
if (isBlank(id)) return null;
if (bannedOrBroken(fullSize)) return null;
if (fullSize != null && fullSize.isThumbnail()) return fetch(url, origin);
var thumbnailId = "t_" + id;
var thumbnailUrl = "cache:" + thumbnailId;
var existingCache = cache(thumbnailUrl, origin);
if (existingCache != null && isBlank(existingCache.getId())) {
// If id is blank the last thumbnail generation must have failed
// Wait for the user to manually refresh
return null;
}
if (storage.exists(origin, CACHE, thumbnailId)) {
return fetch(thumbnailUrl, origin);
} else {
var is = fetch(url, origin);
if (is == null) return null;
var data = images.thumbnail(is);
if (data == null) {
// Returning null means the full size image is already small enough to be a thumbnail
// Set this as a thumbnail to disable future attempts
if (fullSize != null) fullSize.setThumbnail(true);
tagger.plugin(url, origin, "_plugin/cache", fullSize, "-_plugin/delta/cache");
return storage.stream(origin, CACHE, id);
}
try {
if (storage.exists(origin, CACHE, thumbnailId)) {
storage.delete(origin, CACHE, thumbnailId);
}
storage.storeAt(origin, CACHE, thumbnailId, data);
var cache = Cache.builder()
.id(thumbnailId)
.thumbnail(true)
.mimeType("image/png")
.contentLength((long) data.length)
.build();
tagger.plugin(thumbnailUrl, origin, "_plugin/cache", cache, "plugin/thumbnail");
return new ByteArrayInputStream(data);
} catch (Exception e) {
var err = tagger.plugin(thumbnailUrl, origin, "_plugin/cache", Cache.builder().thumbnail(true).build());
tagger.attachError(origin, err, "Error creating thumbnail", getMessage(e));
if (configs.getRemote(origin) != null) {
var cache = existingCache != null ? existingCache : Cache.builder().build();
cache.setBan(true);
tagger.plugin(url, origin, "_plugin/cache", cache);
}
return null;
}
}
}
@Timed(value = "jasper.cache")
public Ref save(String origin, String title, InputStream in, String mimeType, String ...tags) throws IOException {
var id = storage.store(origin, CACHE, in);
var cache = Cache.builder()
.id(id)
.mimeType(mimeType)
.contentLength(storage.size(origin, CACHE, id))
.build();
return tagger.newPlugin("cache:" + id, title, origin, "_plugin/cache", cache, tags);
}
@Timed(value = "jasper.cache")
public void overwrite(String url, String origin, byte[] bytes) throws IOException {
var cache = cache(url, origin);
if (cache == null) throw new NotFoundException("Overwriting cache that does not exist");
storage.overwrite(origin, CACHE, cache.getId(), bytes);
}
@Timed(value = "jasper.cache")
public String overwrite(String url, String origin, InputStream in, String mimeType) throws IOException {
var id = storage.store(origin, CACHE, in);
var cache = Cache.builder()
.id(id)
.mimeType(mimeType)
.contentLength(storage.size(origin, CACHE, id))
.build();
tagger.silentPlugin(url, "", origin, "_plugin/cache", cache);
return id;
}
@Timed(value = "jasper.cache")
public void push(String url, String origin, InputStream in) throws IOException {
if (!url.startsWith("cache:")) throw new NotFoundException("URL is not cacheable");
var id = url.substring("cache:".length());
if (id.matches(".*\\W")) throw new NotFoundException("URL is not cacheable");
storage.storeAt(origin, CACHE, id, in);
}
@Timed(value = "jasper.cache")
public void push(String url, String origin, byte[] data) throws IOException {
if (!url.startsWith("cache:")) throw new NotFoundException("URL is not cacheable");
var id = url.substring("cache:".length());
if (id.matches(".*\\W")) throw new NotFoundException("URL is not cacheable");
if (storage.exists(origin, CACHE, id)) {
storage.overwrite(origin, CACHE, id, data);
} else {
storage.storeAt(origin, CACHE, id, data);
}
}
@Timed(value = "jasper.cache")
public boolean cacheExists(String url, String origin) {
if (!url.startsWith("cache:")) throw new NotFoundException("URL is not cacheable");
return storage.exists(origin, CACHE, url.substring("cache:".length()));
}
private String fetchExistingString(String url, String origin) {
var ref = stat(url, origin);
var cache = getCache(ref);
if (cache == null) return null;
if (bannedOrBroken(cache)) return null;
return new String(storage.get(origin, CACHE, cache.getId()));
}
private List<String> createArchive(String url, String origin, Cache cache) {
var moreScrape = new ArrayList<String>();
if (cache == null || isBlank(cache.getId())) return moreScrape;
// M3U8 Manifest
var data = fetchExistingString(url, origin);
if (data == null) return moreScrape;
try {
var urlObj = URI.create(url).toURL();
if (data.trim().startsWith("#") && (urlObj.getPath().endsWith(".m3u8") || cache.getMimeType().equalsIgnoreCase("application/x-mpegURL") || cache.getMimeType().equalsIgnoreCase("application/vnd.apple.mpegurl"))) {
var hostPath = urlObj.getProtocol() + "://" + urlObj.getHost() + Path.of(urlObj.getPath()).getParent().toString();
// TODO: Set archive base URL
var basePath = isNotBlank(origin) ? "/api/v1/proxy?origin=" + origin + "&url=" : "/api/v1/proxy?url=";
var buffer = new StringBuilder();
for (var line : data.split("\n")) {
if (line.startsWith("#")) {
buffer.append(line).append("\n");
} else {
if (!line.startsWith("http") && !line.startsWith("#")) {
line = hostPath + "/" + line;
}
moreScrape.add(line);
buffer.append(basePath).append(URLEncoder.encode(line, StandardCharsets.UTF_8)).append("\n");
}
}
overwrite(url, origin, buffer.toString().getBytes());
}
} catch (Exception e) {}
return moreScrape;
}
private void cacheLater(String url, String origin) {
if (isBlank(url)) return;
url = fixUrl(url);
var ref = stat(url, origin);
if (ref != null && (ref.hasTag("_plugin/cache") || ref.hasTag("_plugin/delta/cache"))) return;
tagger.internalTag(url, origin, "_plugin/delta/cache");
}
private String fixUrl(String url) {
// TODO: Add plugin to override like oembeds
// return url.replaceAll("%20", "+");
return url.replaceAll(" ", "%20");
}
}