Datalab provides file storage for documents you want to process with pipelines or reuse across multiple API calls. Uploaded files get a reference URL (datalab://file-xxx) that you can use in pipelines.
# Page through all filesoffset = 0limit = 50while True: result = client.list_files(limit=limit, offset=offset) for file in result['files']: print(file.original_filename) if offset + limit >= result['total']: break offset += limit
# By file ID (integer)file = client.get_file_metadata(123)# By hashid (string from reference URL)file = client.get_file_metadata("abc123")print(f"Filename: {file.original_filename}")print(f"Size: {file.file_size} bytes")print(f"Type: {file.content_type}")print(f"Created: {file.created}")
from datalab_sdk import DatalabClientfrom pathlib import Pathclient = DatalabClient()# Find all PDFs in a directorypdf_files = list(Path("./documents").glob("*.pdf"))# Upload all filesuploaded = client.upload_files([str(p) for p in pdf_files])print(f"Uploaded {len(uploaded)} files:")for file in uploaded: print(f" {file.original_filename}: {file.reference}")# Store references for later usereferences = {f.original_filename: f.reference for f in uploaded}