API Endpoints

POST Ingest Text Document

Ingest a text document with metadata.

from databridge import DataBridge

# Create client instance
db = DataBridge(uri="your-databridge-uri")

# Ingest text document
doc = db.ingest_text(
    content="Machine learning is transforming industries...",
    metadata={
        "title": "ML Overview",
        "category": "tech",
        "tags": ["ml", "ai"]
    } # optional - could be empty
)
print(f"Document ID: {doc.external_id}")

Response:

{
    "external_id": "doc_abc123",
    "content_type": "text/plain",
    "filename": null,
    "metadata": {
        "title": "ML Overview",
        "category": "tech",
        "tags": ["ml", "ai"]
    },
    "storage_info": {},
    "system_metadata": {
        "created_at": "2024-03-20T10:30:00Z",
        "updated_at": "2024-03-20T10:30:00Z",
        "version": 1
    },
    "access_control": {
        "readers": ["user_123"],
        "writers": ["user_123"],
        "admins": ["user_123"]
    },
    "chunk_ids": ["chunk_1", "chunk_2"]
}

POST Ingest File Document

Upload and ingest a file document.

from databridge import DataBridge

# Create client instance
db = DataBridge(uri="your-databridge-uri")

# Ingest file document
doc = db.ingest_file(
    file="presentation.pdf",
    filename="Q4_Presentation.pdf",
    content_type="application/pdf",
    metadata={
        "department": "Finance",
        "year": 2024,
        "quarter": 4
    }
)
print(f"Document ID: {doc.external_id}")
print(f"Storage location: {doc.storage_info['bucket']}/{doc.storage_info['key']}")

Response:

{
    "external_id": "doc_xyz789",
    "content_type": "application/pdf",
    "filename": "Q4_Presentation.pdf",
    "metadata": {
        "department": "Finance",
        "year": 2024,
        "quarter": 4
    },
    "storage_info": {
        "bucket": "your-bucket-name",
        "key": "doc_xyz789/Q4_Presentation.pdf"
    },
    "system_metadata": {
        "created_at": "2024-03-20T10:30:00Z",
        "updated_at": "2024-03-20T10:30:00Z",
        "version": 1
    },
    "access_control": {
        "readers": ["user_123"],
        "writers": ["user_123"],
        "admins": ["user_123"]
    },
    "chunk_ids": ["chunk_1", "chunk_2", "chunk_3"]
}

POST Generate AI Completion

Generate AI completions using relevant document context.

from databridge import DataBridge

db = DataBridge(uri="your-databridge-uri")

# Generate completion
response = db.query(
    query="What are the main applications of machine learning?",
    filters={"category": "tech"},
    k=3,
    max_tokens=150,
    temperature=0.7
)

print("Answer:", response.completion)
print(f"Total tokens used: {response.usage.total_tokens}")

Parameters:

Field
Type
Required
Default
Description

query

string

Yes

-

The question or prompt

filters

object

No

null

Metadata filters to apply

k

integer

No

4

Number of context chunks to use

min_score

float

No

0.0

Minimum similarity score threshold

max_tokens

integer

No

null

Maximum tokens in completion

temperature

float

No

null

Sampling temperature for completion

Response:

{
    "completion": "Based on the retrieved context, machine learning has several key applications...",
    "usage": {
        "completion_tokens": 45,
        "prompt_tokens": 120,
        "total_tokens": 165
    }
}

POST Search Document Chunks

Search for relevant document chunks using semantic similarity.

from databridge import DataBridge

db = DataBridge(uri="your-databridge-uri")

# Search for relevant chunks
chunks = db.retrieve_chunks(
    query="machine learning applications",
    filters={"category": "tech"},
    k=3,
    min_score=0.7
)

for chunk in chunks:
    print(f"\nMatch (score: {chunk.score:.2f}):")
    print(chunk.content)
    print(f"From document: {chunk.document_id}")

Parameters:

Field
Type
Required
Default
Description

query

string

Yes

-

The search query text

filters

object

No

null

Metadata filters to apply

k

integer

No

4

Number of chunks to return

min_score

float

No

0.0

Minimum similarity score threshold

Response:

[
    {
        "content": "Machine learning is transforming...",
        "score": 0.89,
        "document_id": "doc_abc123",
        "chunk_number": 0,
        "metadata": {
            "title": "ML Overview",
            "category": "tech"
        },
        "content_type": "text/plain",
        "filename": null,
        "download_url": null
    }
]

GET List Documents

List accessible documents with pagination and optional filtering.

from databridge import DataBridge

db = DataBridge(uri="your-databridge-uri")

# List documents with filters
docs = db.list_documents(
    skip=0,
    limit=10,
    filters={"category": "tech"}
)

for doc in docs:
    print(f"Document ID: {doc.external_id}")
    print(f"Title: {doc.metadata.get('title')}")
    print(f"Created: {doc.system_metadata['created_at']}")

Parameters:

  • skip (optional): Number of documents to skip (default: 0)

  • limit (optional): Maximum documents to return (default: 100)

  • filters (optional): JSON-encoded metadata filters

Response:

[
    {
        "external_id": "doc_abc123",
        "content_type": "text/plain",
        "filename": "example.txt",
        "metadata": {
            "title": "Document Title",
            "category": "tech"
        },
        "storage_info": {
            "bucket": "your-bucket-name",
            "key": "doc_abc123/example.txt"
        },
        "system_metadata": {
            "created_at": "2024-03-20T10:30:00Z",
            "updated_at": "2024-03-20T10:30:00Z",
            "version": 1
        },
        "access_control": {
            "readers": ["user_123"],
            "writers": ["user_123"],
            "admins": ["user_123"]
        },
        "chunk_ids": ["chunk_1", "chunk_2"]
    }
]

GET Document

Get metadata for a specific document.

from databridge import DataBridge

db = DataBridge(uri="your-databridge-uri")

# Get document by ID
doc = db.get_document("doc_abc123")
print(f"Title: {doc.metadata.get('title')}")
print(f"Created: {doc.system_metadata['created_at']}")
if doc.storage_info:
    print(f"Storage: {doc.storage_info['bucket']}/{doc.storage_info['key']}")

Response:

{
    "external_id": "doc_abc123",
    "content_type": "text/plain",
    "filename": "example.txt",
    "metadata": {
        "title": "Document Title",
        "category": "tech"
    },
    "storage_info": {
        "bucket": "your-bucket-name",
        "key": "doc_abc123/example.txt"
    },
    "system_metadata": {
        "created_at": "2024-03-20T10:30:00Z",
        "updated_at": "2024-03-20T10:30:00Z",
        "version": 1
    },
    "access_control": {
        "readers": ["user_123"],
        "writers": ["user_123"],
        "admins": ["user_123"]
    },
    "chunk_ids": ["chunk_1", "chunk_2"]
}

Last updated