1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-23 00:14:19 +00:00
bda-6-steffo/unimore_bda_6/database/cache.py

82 lines
2.1 KiB
Python
Raw Normal View History

import typing as t
import logging
import shutil
import pathlib
import pickle
from .datatypes import Review
log = logging.getLogger(__name__)
2023-02-08 18:46:05 +00:00
CachedDatasetFunc = t.Callable[[], t.Generator[Review, t.Any, None]]
def store_cache(reviews: t.Iterator[Review], path: str | pathlib.Path) -> None:
"""
Store the contents of the given `Review` iterator to different files in a directory at the given path.
"""
path = pathlib.Path(path)
if path.exists():
raise FileExistsError("Specified cache path already exists.")
# Create the temporary directory
log.debug("Creating cache directory: %s", path)
path.mkdir(parents=True)
# Write the documents to path/{index}.pickle
for index, document in enumerate(reviews):
document_path = path.joinpath(f"{index}.pickle")
log.debug("Storing pickle file: %s", document_path)
with open(document_path, "wb") as file:
pickle.dump(document, file)
2023-02-08 18:46:05 +00:00
def load_cache(path: str | pathlib.Path) -> CachedDatasetFunc:
"""
2023-02-08 09:54:14 +00:00
Load the contents of a directory into a `Review` iterator.
"""
path = pathlib.Path(path)
if not path.exists():
raise FileNotFoundError("The specified path does not exist.")
def data_cache_loader():
document_paths = path.iterdir()
for document_path in document_paths:
document_path = pathlib.Path(document_path)
2023-02-08 09:54:14 +00:00
if not str(document_path).endswith(".pickle"):
log.debug("Ignoring non-pickle file: %s", document_path)
2023-02-08 09:54:14 +00:00
continue
log.debug("Loading pickle file: %s", document_path)
with open(document_path, "rb") as file:
result: Review = pickle.load(file)
yield result
return data_cache_loader
2023-02-08 09:54:14 +00:00
def delete_cache(path: str | pathlib.Path) -> None:
"""
Delete the given cache directory.
"""
path = pathlib.Path(path)
if not path.exists():
raise FileNotFoundError("The specified path does not exist.")
2023-02-08 18:46:05 +00:00
log.debug("Deleting cache directory: %s", path)
2023-02-08 09:54:14 +00:00
shutil.rmtree(path)
__all__ = (
2023-02-08 18:46:05 +00:00
"CachedDatasetFunc",
"store_cache",
"load_cache",
2023-02-08 09:54:14 +00:00
"delete_cache",
)