Skip to main content
Register cloud storage providers on a Knowledge instance with content_sources. Each provider has .file() and .folder() methods that create content references you pass to knowledge.insert().
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.remote_content import S3Config

knowledge = Knowledge(
    vector_db=vector_db,
    contents_db=contents_db,
    content_sources=[
        S3Config(
            id="company-docs",
            name="Company Documents",
            bucket_name="my-docs-bucket",
            region="us-east-1",
        ),
    ],
)

# Insert a single file
knowledge.insert(
    name="Q4 Report",
    remote_content=knowledge.content_sources[0].file("reports/q4-2025.pdf"),
)

# Insert an entire folder
knowledge.insert(
    name="Engineering Specs",
    remote_content=knowledge.content_sources[0].folder("specs/"),
)

Supported Providers

ProviderConfig ClassInstall
Amazon S3S3Configpip install boto3
Google Cloud StorageGcsConfigpip install google-cloud-storage
SharePointSharePointConfigpip install msal requests
GitHubGitHubConfigpip install requests
Azure Blob StorageAzureBlobConfigpip install azure-identity azure-storage-blob
All configs are importable from agno.knowledge.remote_content.

Provider Configuration

S3Config

from agno.knowledge.remote_content import S3Config

s3 = S3Config(
    id="s3-docs",
    name="S3 Documents",
    bucket_name="my-bucket",
    region="us-east-1",
    aws_access_key_id="...",       # optional, falls back to default credential chain
    aws_secret_access_key="...",   # optional, falls back to default credential chain
    prefix="documents/",           # optional, default prefix for browsing
)
FieldTypeDefaultDescription
idstrrequiredUnique identifier for this source
namestrrequiredDisplay name
bucket_namestrrequiredS3 bucket name
regionOptional[str]NoneAWS region
aws_access_key_idOptional[str]NoneAWS access key. Falls back to default credential chain.
aws_secret_access_keyOptional[str]NoneAWS secret key. Falls back to default credential chain.
prefixOptional[str]NoneDefault prefix for browsing and listing

GcsConfig

from agno.knowledge.remote_content import GcsConfig

gcs = GcsConfig(
    id="gcs-docs",
    name="GCS Documents",
    bucket_name="my-gcs-bucket",
    project="my-gcp-project",
)
FieldTypeDefaultDescription
idstrrequiredUnique identifier
namestrrequiredDisplay name
bucket_namestrrequiredGCS bucket name
projectOptional[str]NoneGCP project ID
credentials_pathOptional[str]NonePath to GCP credentials file
prefixOptional[str]NoneDefault prefix

GitHubConfig

from agno.knowledge.remote_content import GitHubConfig

github = GitHubConfig(
    id="my-repo",
    name="My Repository",
    repo="owner/repo",
    token="ghp_...",
    branch="main",
)
FieldTypeDefaultDescription
idstrrequiredUnique identifier
namestrrequiredDisplay name
repostrrequiredRepository in owner/repo format
tokenOptional[str]NoneGitHub personal access token (needs Contents: read)
branchOptional[str]NoneBranch name
pathOptional[str]NoneDefault path filter

SharePointConfig

from agno.knowledge.remote_content import SharePointConfig

sharepoint = SharePointConfig(
    id="sharepoint-docs",
    name="SharePoint Documents",
    tenant_id="...",
    client_id="...",
    client_secret="...",
    hostname="contoso.sharepoint.com",
    site_path="/sites/Engineering",
)
FieldTypeDefaultDescription
idstrrequiredUnique identifier
namestrrequiredDisplay name
tenant_idstrrequiredAzure AD tenant ID
client_idstrrequiredAzure AD application client ID
client_secretstrrequiredAzure AD application client secret
hostnamestrrequiredSharePoint hostname
site_pathOptional[str]NoneSite path (e.g., /sites/Engineering)
site_idOptional[str]NoneFull site ID
folder_pathOptional[str]NoneDefault folder path

AzureBlobConfig

from agno.knowledge.remote_content import AzureBlobConfig

azure = AzureBlobConfig(
    id="azure-docs",
    name="Azure Blob Documents",
    tenant_id="...",
    client_id="...",
    client_secret="...",
    storage_account="mystorageaccount",
    container="documents",
)
FieldTypeDefaultDescription
idstrrequiredUnique identifier
namestrrequiredDisplay name
tenant_idstrrequiredAzure AD tenant ID
client_idstrrequiredAzure AD application client ID
client_secretstrrequiredAzure AD application client secret
storage_accountstrrequiredAzure storage account name
containerstrrequiredBlob container name
prefixOptional[str]NoneDefault prefix
Requires the Storage Blob Data Reader (or Contributor) role on the storage account.

Inserting Content

Each config has .file() and .folder() methods that return content references for knowledge.insert().
# Single file
knowledge.insert(
    name="Architecture Doc",
    remote_content=s3.file("docs/architecture.pdf"),
)

# Entire folder
knowledge.insert(
    name="All Specs",
    remote_content=gcs.folder("specs/"),
)

# GitHub file from a specific branch
knowledge.insert(
    name="README",
    remote_content=github.file("README.md", branch="develop"),
)

# SharePoint file from a specific site
knowledge.insert(
    name="Policy",
    remote_content=sharepoint.file("Shared Documents/policy.pdf", site_path="/sites/HR"),
)

Browsing S3 Files

S3Config supports paginated file listing with list_files(). This is useful for building file pickers or exploring bucket contents before ingesting.
result = s3.list_files(prefix="reports/", limit=50, page=1)

for folder in result.folders:
    print(f"Folder: {folder['name']}")

for file in result.files:
    print(f"File: {file['name']} ({file['size']} bytes)")

print(f"Page {result.page} of {result.total_pages}")
ParameterTypeDefaultDescription
prefixOptional[str]NonePath prefix filter. Overrides the config’s prefix.
delimiterstr"/"Folder delimiter
limitint100Files per page (1-1000)
pageint1Page number (1-indexed)
An async variant alist_files() is also available with the same signature.

Multiple Sources

Register multiple providers on a single Knowledge instance.
knowledge = Knowledge(
    vector_db=vector_db,
    contents_db=contents_db,
    content_sources=[s3, gcs, github, sharepoint, azure],
)

# Insert from different sources
knowledge.insert(name="S3 Doc", remote_content=s3.file("doc.pdf"))
knowledge.insert(name="GitHub Doc", remote_content=github.file("README.md"))
When running with AgentOS, registered sources are exposed via the /knowledge/{id}/sources API endpoint for listing and browsing.

Next Steps

TaskGuide
Content types overviewContent Types
Filter search resultsFiltering
Set up a vector databaseVector Databases