PDF Metadata
Set and retrieve PDF document metadata.
PDF Metadata
Set and retrieve metadata properties for PDF documents, including title, author, subject, keywords, and more.
Set Metadata
Add or update metadata on a PDF document.
POST
/v1/pdf/metadata
Request Body
| Parameter | Type | Required | Description |
|---|---|---|---|
pdf | string | Yes | Base64-encoded PDF content |
url | string | Yes* | URL to fetch the PDF from |
metadata | object | Yes | Metadata properties to set |
*Either pdf or url is required, not both.
Metadata Object
| Property | Type | Description |
|---|---|---|
title | string | Document title |
author | string | Document author name |
subject | string | Document subject or description |
keywords | string | Comma-separated keywords |
creator | string | Application that created the original content |
producer | string | Application that produced the PDF |
Code Examples - Set Metadata
curl -X POST https://api.pdfapi.dev/v1/pdf/metadata \
-H "Authorization: Bearer sk_live_xxx" \
-H "Content-Type: application/json" \
-d '{
"pdf": "JVBERi0xLjQKMSAwIG9iago8PAovVHlwZSAvQ2F0YWxvZwo...",
"metadata": {
"title": "Q4 2025 Financial Report",
"author": "Finance Department",
"subject": "Quarterly financial summary and projections",
"keywords": "finance, quarterly, report, 2025, Q4",
"creator": "Internal Reporting System",
"producer": "PDF API"
}
}' \
--output report-with-metadata.pdf
import fs from 'fs';
// Read PDF and encode as base64
const pdfBuffer = fs.readFileSync('document.pdf');
const pdfBase64 = pdfBuffer.toString('base64');
const response = await fetch('https://api.pdfapi.dev/v1/pdf/metadata', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.PDFAPI_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
pdf: pdfBase64,
metadata: {
title: 'Q4 2025 Financial Report',
author: 'Finance Department',
subject: 'Quarterly financial summary and projections',
keywords: 'finance, quarterly, report, 2025, Q4'
}
}),
});
const buffer = await response.arrayBuffer();
fs.writeFileSync('report-with-metadata.pdf', Buffer.from(buffer));
import requests
import base64
import os
# Read and encode the PDF
with open('document.pdf', 'rb') as f:
pdf_base64 = base64.b64encode(f.read()).decode('utf-8')
response = requests.post(
'https://api.pdfapi.dev/v1/pdf/metadata',
headers={
'Authorization': f'Bearer {os.environ["PDFAPI_KEY"]}',
'Content-Type': 'application/json',
},
json={
'pdf': pdf_base64,
'metadata': {
'title': 'Q4 2025 Financial Report',
'author': 'Finance Department',
'subject': 'Quarterly financial summary and projections',
'keywords': 'finance, quarterly, report, 2025, Q4'
}
}
)
if response.status_code == 200:
with open('report-with-metadata.pdf', 'wb') as f:
f.write(response.content)
package main
import (
"bytes"
"encoding/base64"
"encoding/json"
"io"
"net/http"
"os"
)
func main() {
pdfContent, _ := os.ReadFile("document.pdf")
pdfBase64 := base64.StdEncoding.EncodeToString(pdfContent)
payload := map[string]interface{}{
"pdf": pdfBase64,
"metadata": map[string]string{
"title": "Q4 2025 Financial Report",
"author": "Finance Department",
"subject": "Quarterly financial summary",
"keywords": "finance, quarterly, report",
},
}
body, _ := json.Marshal(payload)
req, _ := http.NewRequest("POST", "https://api.pdfapi.dev/v1/pdf/metadata", bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer "+os.Getenv("PDFAPI_KEY"))
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, _ := client.Do(req)
defer resp.Body.Close()
pdf, _ := io.ReadAll(resp.Body)
os.WriteFile("report-with-metadata.pdf", pdf, 0644)
}
Get Metadata
Retrieve metadata from an existing PDF document.
POST
/v1/pdf/metadata/get
Request Body
| Parameter | Type | Required | Description |
|---|---|---|---|
pdf | string | Yes | Base64-encoded PDF content |
url | string | Yes* | URL to fetch the PDF from |
*Either pdf or url is required, not both.
Code Examples - Get Metadata
curl -X POST https://api.pdfapi.dev/v1/pdf/metadata/get \
-H "Authorization: Bearer sk_live_xxx" \
-H "Content-Type: application/json" \
-d '{
"url": "https://example.com/document.pdf"
}'
const response = await fetch('https://api.pdfapi.dev/v1/pdf/metadata/get', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.PDFAPI_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
url: 'https://example.com/document.pdf'
}),
});
const { data } = await response.json();
console.log('Title:', data.title);
console.log('Author:', data.author);
console.log('Keywords:', data.keywords);
import requests
import os
response = requests.post(
'https://api.pdfapi.dev/v1/pdf/metadata/get',
headers={
'Authorization': f'Bearer {os.environ["PDFAPI_KEY"]}',
'Content-Type': 'application/json',
},
json={
'url': 'https://example.com/document.pdf'
}
)
if response.status_code == 200:
metadata = response.json()['data']
print(f"Title: {metadata.get('title')}")
print(f"Author: {metadata.get('author')}")
print(f"Created: {metadata.get('creation_date')}")
Response - Get Metadata
Success (200)
{
"data": {
"title": "Q4 2025 Financial Report",
"author": "Finance Department",
"subject": "Quarterly financial summary and projections",
"keywords": "finance, quarterly, report, 2025, Q4",
"creator": "Internal Reporting System",
"producer": "PDF API",
"creation_date": "2025-12-15T10:30:00Z",
"modification_date": "2025-12-20T14:45:00Z",
"page_count": 24,
"pdf_version": "1.7"
}
}
Response - Set Metadata
Success (200)
Returns the PDF file with updated metadata:
Content-Type: application/pdf
Content-Disposition: attachment; filename="document.pdf"
Error Responses
Missing PDF Source (400)
{
"error": {
"code": "VALIDATION_ERROR",
"message": "Either 'pdf' or 'url' is required"
}
}
Invalid PDF (400)
{
"error": {
"code": "INVALID_PDF",
"message": "The provided content is not a valid PDF document"
}
}
Empty Metadata (400)
{
"error": {
"code": "VALIDATION_ERROR",
"message": "At least one metadata property is required"
}
}
See Error Codes for complete reference.
Use Cases
Document Cataloging
Add metadata to improve document organization and searchability:
async function catalogDocument(pdfBase64, documentInfo) {
const response = await fetch('https://api.pdfapi.dev/v1/pdf/metadata', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.PDFAPI_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
pdf: pdfBase64,
metadata: {
title: documentInfo.name,
author: documentInfo.uploadedBy,
subject: documentInfo.category,
keywords: documentInfo.tags.join(', ')
}
}),
});
return response.arrayBuffer();
}
Compliance and Auditing
Extract metadata for compliance verification:
def verify_document_metadata(pdf_url):
response = requests.post(
'https://api.pdfapi.dev/v1/pdf/metadata/get',
headers={'Authorization': f'Bearer {os.environ["PDFAPI_KEY"]}'},
json={'url': pdf_url}
)
metadata = response.json()['data']
# Check required fields
required = ['title', 'author', 'creation_date']
missing = [f for f in required if not metadata.get(f)]
return {
'compliant': len(missing) == 0,
'missing_fields': missing,
'metadata': metadata
}
Notes
- Setting metadata does not modify the PDF content, only the document properties
- Existing metadata is preserved unless explicitly overwritten
- The
creation_dateis automatically set when a PDF is first created - The
modification_dateis updated when metadata is changed - Maximum length for any metadata field is 1024 characters
- Keywords should be comma-separated for best compatibility