API Endpoints
POST
Ingest Text Document
POST
Ingest Text DocumentIngest a text document with metadata.
from databridge import DataBridge
# Create client instance
db = DataBridge(uri="your-databridge-uri")
# Ingest text document
doc = db.ingest_text(
content="Machine learning is transforming industries...",
metadata={
"title": "ML Overview",
"category": "tech",
"tags": ["ml", "ai"]
} # optional - could be empty
)
print(f"Document ID: {doc.external_id}")
curl -X POST "http://localhost:8000/ingest/text" \
-H "Authorization: Bearer your_token" \
-H "Content-Type: application/json" \
-d '{
"content": "Machine learning is transforming industries...",
"metadata": {
"title": "ML Overview",
"category": "tech",
"tags": ["ml", "ai"]
}
}'
Response:
{
"external_id": "doc_abc123",
"content_type": "text/plain",
"filename": null,
"metadata": {
"title": "ML Overview",
"category": "tech",
"tags": ["ml", "ai"]
},
"storage_info": {},
"system_metadata": {
"created_at": "2024-03-20T10:30:00Z",
"updated_at": "2024-03-20T10:30:00Z",
"version": 1
},
"access_control": {
"readers": ["user_123"],
"writers": ["user_123"],
"admins": ["user_123"]
},
"chunk_ids": ["chunk_1", "chunk_2"]
}
{
"detail": "Invalid authentication credentials"
}
{
"detail": [
{
"loc": ["body", "content"],
"msg": "field required",
"type": "value_error.missing"
}
]
}
POST
Ingest File Document
POST
Ingest File DocumentUpload and ingest a file document.
from databridge import DataBridge
# Create client instance
db = DataBridge(uri="your-databridge-uri")
# Ingest file document
doc = db.ingest_file(
file="presentation.pdf",
filename="Q4_Presentation.pdf",
content_type="application/pdf",
metadata={
"department": "Finance",
"year": 2024,
"quarter": 4
}
)
print(f"Document ID: {doc.external_id}")
print(f"Storage location: {doc.storage_info['bucket']}/{doc.storage_info['key']}")
curl -X POST "http://localhost:8000/ingest/file" \
-H "Authorization: Bearer your_token" \
-F "file=@presentation.pdf" \
-F 'metadata={"department":"Finance","year":2024,"quarter":4}'
Response:
{
"external_id": "doc_xyz789",
"content_type": "application/pdf",
"filename": "Q4_Presentation.pdf",
"metadata": {
"department": "Finance",
"year": 2024,
"quarter": 4
},
"storage_info": {
"bucket": "your-bucket-name",
"key": "doc_xyz789/Q4_Presentation.pdf"
},
"system_metadata": {
"created_at": "2024-03-20T10:30:00Z",
"updated_at": "2024-03-20T10:30:00Z",
"version": 1
},
"access_control": {
"readers": ["user_123"],
"writers": ["user_123"],
"admins": ["user_123"]
},
"chunk_ids": ["chunk_1", "chunk_2", "chunk_3"]
}
{
"detail": "Invalid authentication credentials"
}
{
"detail": "File size exceeds maximum allowed size of 100MB"
}
POST
Generate AI Completion
POST
Generate AI CompletionGenerate AI completions using relevant document context.
from databridge import DataBridge
db = DataBridge(uri="your-databridge-uri")
# Generate completion
response = db.query(
query="What are the main applications of machine learning?",
filters={"category": "tech"},
k=3,
max_tokens=150,
temperature=0.7
)
print("Answer:", response.completion)
print(f"Total tokens used: {response.usage.total_tokens}")
curl -X POST "http://localhost:8000/query" \
-H "Authorization: Bearer your_token" \
-H "Content-Type: application/json" \
-d '{
"query": "What are the main applications of machine learning?",
"filters": {
"category": "tech"
},
"k": 3,
"max_tokens": 150,
"temperature": 0.7
}'
Parameters:
query
string
Yes
-
The question or prompt
filters
object
No
null
Metadata filters to apply
k
integer
No
4
Number of context chunks to use
min_score
float
No
0.0
Minimum similarity score threshold
max_tokens
integer
No
null
Maximum tokens in completion
temperature
float
No
null
Sampling temperature for completion
Response:
{
"completion": "Based on the retrieved context, machine learning has several key applications...",
"usage": {
"completion_tokens": 45,
"prompt_tokens": 120,
"total_tokens": 165
}
}
{
"detail": "Invalid authentication credentials"
}
{
"detail": [
{
"loc": ["body", "query"],
"msg": "field required",
"type": "value_error.missing"
}
]
}
POST
Search Document Chunks
POST
Search Document ChunksSearch for relevant document chunks using semantic similarity.
from databridge import DataBridge
db = DataBridge(uri="your-databridge-uri")
# Search for relevant chunks
chunks = db.retrieve_chunks(
query="machine learning applications",
filters={"category": "tech"},
k=3,
min_score=0.7
)
for chunk in chunks:
print(f"\nMatch (score: {chunk.score:.2f}):")
print(chunk.content)
print(f"From document: {chunk.document_id}")
curl -X POST "http://localhost:8000/retrieve/chunks" \
-H "Authorization: Bearer your_token" \
-H "Content-Type: application/json" \
-d '{
"query": "machine learning applications",
"filters": {
"category": "tech"
},
"k": 3,
"min_score": 0.7
}'
Parameters:
query
string
Yes
-
The search query text
filters
object
No
null
Metadata filters to apply
k
integer
No
4
Number of chunks to return
min_score
float
No
0.0
Minimum similarity score threshold
Response:
[
{
"content": "Machine learning is transforming...",
"score": 0.89,
"document_id": "doc_abc123",
"chunk_number": 0,
"metadata": {
"title": "ML Overview",
"category": "tech"
},
"content_type": "text/plain",
"filename": null,
"download_url": null
}
]
{
"detail": "Invalid authentication credentials"
}
{
"detail": [
{
"loc": ["body", "query"],
"msg": "field required",
"type": "value_error.missing"
}
]
}
GET
List Documents
GET
List DocumentsList accessible documents with pagination and optional filtering.
from databridge import DataBridge
db = DataBridge(uri="your-databridge-uri")
# List documents with filters
docs = db.list_documents(
skip=0,
limit=10,
filters={"category": "tech"}
)
for doc in docs:
print(f"Document ID: {doc.external_id}")
print(f"Title: {doc.metadata.get('title')}")
print(f"Created: {doc.system_metadata['created_at']}")
curl "http://localhost:8000/documents?skip=0&limit=10&filters={\"category\":\"tech\"}" \
-H "Authorization: Bearer your_token"
Parameters:
skip
(optional): Number of documents to skip (default: 0)limit
(optional): Maximum documents to return (default: 100)filters
(optional): JSON-encoded metadata filters
Response:
[
{
"external_id": "doc_abc123",
"content_type": "text/plain",
"filename": "example.txt",
"metadata": {
"title": "Document Title",
"category": "tech"
},
"storage_info": {
"bucket": "your-bucket-name",
"key": "doc_abc123/example.txt"
},
"system_metadata": {
"created_at": "2024-03-20T10:30:00Z",
"updated_at": "2024-03-20T10:30:00Z",
"version": 1
},
"access_control": {
"readers": ["user_123"],
"writers": ["user_123"],
"admins": ["user_123"]
},
"chunk_ids": ["chunk_1", "chunk_2"]
}
]
{
"detail": "Invalid authentication credentials"
}
{
"detail": [
{
"loc": ["query", "limit"],
"msg": "ensure this value is less than or equal to 100",
"type": "value_error.number.not_le"
}
]
}
GET
Document
GET
DocumentGet metadata for a specific document.
from databridge import DataBridge
db = DataBridge(uri="your-databridge-uri")
# Get document by ID
doc = db.get_document("doc_abc123")
print(f"Title: {doc.metadata.get('title')}")
print(f"Created: {doc.system_metadata['created_at']}")
if doc.storage_info:
print(f"Storage: {doc.storage_info['bucket']}/{doc.storage_info['key']}")
curl "http://localhost:8000/documents/doc_abc123" \
-H "Authorization: Bearer your_token"
Response:
{
"external_id": "doc_abc123",
"content_type": "text/plain",
"filename": "example.txt",
"metadata": {
"title": "Document Title",
"category": "tech"
},
"storage_info": {
"bucket": "your-bucket-name",
"key": "doc_abc123/example.txt"
},
"system_metadata": {
"created_at": "2024-03-20T10:30:00Z",
"updated_at": "2024-03-20T10:30:00Z",
"version": 1
},
"access_control": {
"readers": ["user_123"],
"writers": ["user_123"],
"admins": ["user_123"]
},
"chunk_ids": ["chunk_1", "chunk_2"]
}
{
"detail": "Invalid authentication credentials"
}
{
"detail": "Document not found"
}
Last updated