PDF Metadata

Set and retrieve PDF document metadata.

PDF Metadata

Set and retrieve metadata properties for PDF documents, including title, author, subject, keywords, and more.


Set Metadata

Add or update metadata on a PDF document.

POST /v1/pdf/metadata

Request Body

ParameterTypeRequiredDescription
pdfstringYesBase64-encoded PDF content
urlstringYes*URL to fetch the PDF from
metadataobjectYesMetadata properties to set

*Either pdf or url is required, not both.

Metadata Object

PropertyTypeDescription
titlestringDocument title
authorstringDocument author name
subjectstringDocument subject or description
keywordsstringComma-separated keywords
creatorstringApplication that created the original content
producerstringApplication that produced the PDF

Code Examples - Set Metadata

curl -X POST https://api.pdfapi.dev/v1/pdf/metadata \
  -H "Authorization: Bearer sk_live_xxx" \
  -H "Content-Type: application/json" \
  -d '{
    "pdf": "JVBERi0xLjQKMSAwIG9iago8PAovVHlwZSAvQ2F0YWxvZwo...",
    "metadata": {
      "title": "Q4 2025 Financial Report",
      "author": "Finance Department",
      "subject": "Quarterly financial summary and projections",
      "keywords": "finance, quarterly, report, 2025, Q4",
      "creator": "Internal Reporting System",
      "producer": "PDF API"
    }
  }' \
  --output report-with-metadata.pdf
import fs from 'fs';

// Read PDF and encode as base64
const pdfBuffer = fs.readFileSync('document.pdf');
const pdfBase64 = pdfBuffer.toString('base64');

const response = await fetch('https://api.pdfapi.dev/v1/pdf/metadata', {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${process.env.PDFAPI_KEY}`,
    'Content-Type': 'application/json',
  },
  body: JSON.stringify({
    pdf: pdfBase64,
    metadata: {
      title: 'Q4 2025 Financial Report',
      author: 'Finance Department',
      subject: 'Quarterly financial summary and projections',
      keywords: 'finance, quarterly, report, 2025, Q4'
    }
  }),
});

const buffer = await response.arrayBuffer();
fs.writeFileSync('report-with-metadata.pdf', Buffer.from(buffer));
import requests
import base64
import os

# Read and encode the PDF
with open('document.pdf', 'rb') as f:
    pdf_base64 = base64.b64encode(f.read()).decode('utf-8')

response = requests.post(
    'https://api.pdfapi.dev/v1/pdf/metadata',
    headers={
        'Authorization': f'Bearer {os.environ["PDFAPI_KEY"]}',
        'Content-Type': 'application/json',
    },
    json={
        'pdf': pdf_base64,
        'metadata': {
            'title': 'Q4 2025 Financial Report',
            'author': 'Finance Department',
            'subject': 'Quarterly financial summary and projections',
            'keywords': 'finance, quarterly, report, 2025, Q4'
        }
    }
)

if response.status_code == 200:
    with open('report-with-metadata.pdf', 'wb') as f:
        f.write(response.content)
package main

import (
    "bytes"
    "encoding/base64"
    "encoding/json"
    "io"
    "net/http"
    "os"
)

func main() {
    pdfContent, _ := os.ReadFile("document.pdf")
    pdfBase64 := base64.StdEncoding.EncodeToString(pdfContent)

    payload := map[string]interface{}{
        "pdf": pdfBase64,
        "metadata": map[string]string{
            "title":    "Q4 2025 Financial Report",
            "author":   "Finance Department",
            "subject":  "Quarterly financial summary",
            "keywords": "finance, quarterly, report",
        },
    }

    body, _ := json.Marshal(payload)
    req, _ := http.NewRequest("POST", "https://api.pdfapi.dev/v1/pdf/metadata", bytes.NewBuffer(body))
    req.Header.Set("Authorization", "Bearer "+os.Getenv("PDFAPI_KEY"))
    req.Header.Set("Content-Type", "application/json")

    client := &http.Client{}
    resp, _ := client.Do(req)
    defer resp.Body.Close()

    pdf, _ := io.ReadAll(resp.Body)
    os.WriteFile("report-with-metadata.pdf", pdf, 0644)
}

Get Metadata

Retrieve metadata from an existing PDF document.

POST /v1/pdf/metadata/get

Request Body

ParameterTypeRequiredDescription
pdfstringYesBase64-encoded PDF content
urlstringYes*URL to fetch the PDF from

*Either pdf or url is required, not both.


Code Examples - Get Metadata

curl -X POST https://api.pdfapi.dev/v1/pdf/metadata/get \
  -H "Authorization: Bearer sk_live_xxx" \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://example.com/document.pdf"
  }'
const response = await fetch('https://api.pdfapi.dev/v1/pdf/metadata/get', {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${process.env.PDFAPI_KEY}`,
    'Content-Type': 'application/json',
  },
  body: JSON.stringify({
    url: 'https://example.com/document.pdf'
  }),
});

const { data } = await response.json();
console.log('Title:', data.title);
console.log('Author:', data.author);
console.log('Keywords:', data.keywords);
import requests
import os

response = requests.post(
    'https://api.pdfapi.dev/v1/pdf/metadata/get',
    headers={
        'Authorization': f'Bearer {os.environ["PDFAPI_KEY"]}',
        'Content-Type': 'application/json',
    },
    json={
        'url': 'https://example.com/document.pdf'
    }
)

if response.status_code == 200:
    metadata = response.json()['data']
    print(f"Title: {metadata.get('title')}")
    print(f"Author: {metadata.get('author')}")
    print(f"Created: {metadata.get('creation_date')}")

Response - Get Metadata

Success (200)

{
  "data": {
    "title": "Q4 2025 Financial Report",
    "author": "Finance Department",
    "subject": "Quarterly financial summary and projections",
    "keywords": "finance, quarterly, report, 2025, Q4",
    "creator": "Internal Reporting System",
    "producer": "PDF API",
    "creation_date": "2025-12-15T10:30:00Z",
    "modification_date": "2025-12-20T14:45:00Z",
    "page_count": 24,
    "pdf_version": "1.7"
  }
}

Response - Set Metadata

Success (200)

Returns the PDF file with updated metadata:

Content-Type: application/pdf
Content-Disposition: attachment; filename="document.pdf"

Error Responses

Missing PDF Source (400)

{
  "error": {
    "code": "VALIDATION_ERROR",
    "message": "Either 'pdf' or 'url' is required"
  }
}

Invalid PDF (400)

{
  "error": {
    "code": "INVALID_PDF",
    "message": "The provided content is not a valid PDF document"
  }
}

Empty Metadata (400)

{
  "error": {
    "code": "VALIDATION_ERROR",
    "message": "At least one metadata property is required"
  }
}

See Error Codes for complete reference.


Use Cases

Document Cataloging

Add metadata to improve document organization and searchability:

async function catalogDocument(pdfBase64, documentInfo) {
  const response = await fetch('https://api.pdfapi.dev/v1/pdf/metadata', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.PDFAPI_KEY}`,
      'Content-Type': 'application/json',
    },
    body: JSON.stringify({
      pdf: pdfBase64,
      metadata: {
        title: documentInfo.name,
        author: documentInfo.uploadedBy,
        subject: documentInfo.category,
        keywords: documentInfo.tags.join(', ')
      }
    }),
  });

  return response.arrayBuffer();
}

Compliance and Auditing

Extract metadata for compliance verification:

def verify_document_metadata(pdf_url):
    response = requests.post(
        'https://api.pdfapi.dev/v1/pdf/metadata/get',
        headers={'Authorization': f'Bearer {os.environ["PDFAPI_KEY"]}'},
        json={'url': pdf_url}
    )

    metadata = response.json()['data']

    # Check required fields
    required = ['title', 'author', 'creation_date']
    missing = [f for f in required if not metadata.get(f)]

    return {
        'compliant': len(missing) == 0,
        'missing_fields': missing,
        'metadata': metadata
    }

Notes

  • Setting metadata does not modify the PDF content, only the document properties
  • Existing metadata is preserved unless explicitly overwritten
  • The creation_date is automatically set when a PDF is first created
  • The modification_date is updated when metadata is changed
  • Maximum length for any metadata field is 1024 characters
  • Keywords should be comma-separated for best compatibility