Add OpenAI Deep Research MCP server

- FastMCP server with deep_research and deep_research_info tools
- OpenAI Responses API integration with background polling
- Configurable model via DEEP_RESEARCH_MODEL env var
- Default: o4-mini-deep-research (faster/cheaper)
- Optional FastAPI backend for standalone use
- Tested successfully: 80s query, 20 web searches, 4 citations
This commit is contained in:
Krishna Kumar
2025-12-30 16:00:37 -06:00
commit 9a6ac3fd2f
11 changed files with 1750 additions and 0 deletions

1
backend/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Deep Research Backend - FastAPI service for OpenAI Deep Research API."""

29
backend/config.py Normal file
View File

@@ -0,0 +1,29 @@
"""Configuration for Deep Research MCP Server."""
import os
from dotenv import load_dotenv
load_dotenv()
# OpenAI API Key (required)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# Hardcoded fallback model (cheaper/faster option)
DEFAULT_MODEL = "o4-mini-deep-research-2025-06-26"
# Available models for reference
AVAILABLE_MODELS = [
"o4-mini-deep-research-2025-06-26", # Faster, cheaper (DEFAULT)
"o3-deep-research-2025-06-26", # Thorough, ~$1+ per query
]
# Deep Research Model - configurable via Docker env, falls back to hardcoded default
DEEP_RESEARCH_MODEL = os.getenv("DEEP_RESEARCH_MODEL") or DEFAULT_MODEL
# FastAPI service configuration
FASTAPI_HOST = os.getenv("DEEP_RESEARCH_HOST", "0.0.0.0")
FASTAPI_PORT = int(os.getenv("DEEP_RESEARCH_PORT", "8002"))
# Polling configuration
POLL_INTERVAL_SECONDS = float(os.getenv("DEEP_RESEARCH_POLL_INTERVAL", "5.0"))
MAX_WAIT_MINUTES = int(os.getenv("DEEP_RESEARCH_MAX_WAIT", "15"))

211
backend/main.py Normal file
View File

@@ -0,0 +1,211 @@
"""FastAPI service for Deep Research MCP Server."""
import time
import uuid
from typing import Any
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from .config import FASTAPI_HOST, FASTAPI_PORT, MAX_WAIT_MINUTES
from .openai_client import get_client
app = FastAPI(
title="Deep Research API",
description="OpenAI Deep Research API wrapper",
version="0.1.0",
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# In-memory task tracking (for status lookups)
_tasks: dict[str, dict[str, Any]] = {}
class ResearchRequest(BaseModel):
"""Request model for starting research."""
query: str
system_prompt: str | None = None
include_code_analysis: bool = True
max_wait_minutes: int = MAX_WAIT_MINUTES
class ResearchResponse(BaseModel):
"""Response model for research results."""
task_id: str
status: str
model: str | None = None
report_text: str | None = None
citations: list[dict[str, Any]] | None = None
web_searches: int | None = None
code_executions: int | None = None
elapsed_time: float | None = None
error: str | None = None
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {"status": "healthy", "service": "deep-research"}
@app.post("/api/research", response_model=ResearchResponse)
async def start_research(request: ResearchRequest) -> ResearchResponse:
"""
Start a deep research task.
This initiates a background research task and returns immediately.
Use GET /api/research/{task_id} to poll for results.
"""
client = get_client()
task_id = str(uuid.uuid4())
start_time = time.time()
try:
# Start the research
result = await client.start_research(
query=request.query,
system_prompt=request.system_prompt,
include_code_analysis=request.include_code_analysis,
)
# Store task info
_tasks[task_id] = {
"response_id": result["id"],
"status": result["status"],
"model": result["model"],
"start_time": start_time,
"max_wait_minutes": request.max_wait_minutes,
}
return ResearchResponse(
task_id=task_id,
status=result["status"],
model=result["model"],
)
except Exception as e:
return ResearchResponse(
task_id=task_id,
status="failed",
error=str(e),
)
@app.get("/api/research/{task_id}", response_model=ResearchResponse)
async def get_research_status(task_id: str) -> ResearchResponse:
"""
Get the status/results of a research task.
Poll this endpoint until status is 'completed', 'failed', or 'timeout'.
"""
if task_id not in _tasks:
raise HTTPException(status_code=404, detail="Task not found")
task = _tasks[task_id]
client = get_client()
try:
result = await client.poll_research(task["response_id"])
elapsed = time.time() - task["start_time"]
# Check for timeout
max_seconds = task["max_wait_minutes"] * 60
if result["status"] not in ("completed", "failed", "cancelled") and elapsed >= max_seconds:
result["status"] = "timeout"
result["error"] = f"Timeout after {task['max_wait_minutes']} minutes"
# Update stored status
task["status"] = result["status"]
response = ResearchResponse(
task_id=task_id,
status=result["status"],
model=result.get("model"),
elapsed_time=elapsed,
error=result.get("error"),
)
# Include output if completed
if result["status"] == "completed" and "output" in result:
output = result["output"]
response.report_text = output.get("report_text")
response.citations = output.get("citations")
response.web_searches = output.get("web_searches")
response.code_executions = output.get("code_executions")
return response
except Exception as e:
return ResearchResponse(
task_id=task_id,
status="failed",
error=str(e),
)
@app.post("/api/research/{task_id}/wait", response_model=ResearchResponse)
async def wait_for_research(task_id: str) -> ResearchResponse:
"""
Wait for a research task to complete (blocking).
This will poll until completion or timeout.
"""
if task_id not in _tasks:
raise HTTPException(status_code=404, detail="Task not found")
task = _tasks[task_id]
client = get_client()
start_time = task["start_time"]
try:
result = await client.wait_for_completion(
response_id=task["response_id"],
max_wait_minutes=task["max_wait_minutes"],
)
elapsed = time.time() - start_time
task["status"] = result["status"]
response = ResearchResponse(
task_id=task_id,
status=result["status"],
model=result.get("model"),
elapsed_time=elapsed,
error=result.get("error"),
)
if result["status"] == "completed" and "output" in result:
output = result["output"]
response.report_text = output.get("report_text")
response.citations = output.get("citations")
response.web_searches = output.get("web_searches")
response.code_executions = output.get("code_executions")
return response
except Exception as e:
return ResearchResponse(
task_id=task_id,
status="failed",
error=str(e),
)
def main():
"""Run the FastAPI server."""
import uvicorn
print(f"Starting Deep Research API on {FASTAPI_HOST}:{FASTAPI_PORT}")
uvicorn.run(app, host=FASTAPI_HOST, port=FASTAPI_PORT)
if __name__ == "__main__":
main()

193
backend/openai_client.py Normal file
View File

@@ -0,0 +1,193 @@
"""OpenAI Responses API client for Deep Research."""
import asyncio
import time
from typing import Any
from openai import AsyncOpenAI
from .config import OPENAI_API_KEY, DEEP_RESEARCH_MODEL, POLL_INTERVAL_SECONDS
class DeepResearchClient:
"""Client for OpenAI Deep Research via Responses API."""
def __init__(self):
if not OPENAI_API_KEY:
raise ValueError("OPENAI_API_KEY environment variable is required")
self.client = AsyncOpenAI(api_key=OPENAI_API_KEY)
self.model = DEEP_RESEARCH_MODEL
async def start_research(
self,
query: str,
system_prompt: str | None = None,
include_code_analysis: bool = True,
) -> dict[str, Any]:
"""
Start a deep research task in background mode.
Args:
query: The research query
system_prompt: Optional system/developer prompt
include_code_analysis: Whether to include code_interpreter tool
Returns:
Response object with id for polling
"""
# Build input messages
input_messages = []
if system_prompt:
input_messages.append({
"role": "developer",
"content": [{"type": "input_text", "text": system_prompt}]
})
input_messages.append({
"role": "user",
"content": [{"type": "input_text", "text": query}]
})
# Build tools list
tools = [{"type": "web_search_preview"}]
if include_code_analysis:
tools.append({
"type": "code_interpreter",
"container": {"type": "auto", "file_ids": []}
})
# Start background research
response = await self.client.responses.create(
model=self.model,
input=input_messages,
reasoning={"summary": "auto"},
tools=tools,
background=True, # Run in background for long tasks
)
return {
"id": response.id,
"status": response.status,
"model": self.model,
"created_at": time.time(),
}
async def poll_research(self, response_id: str) -> dict[str, Any]:
"""
Poll for research completion status.
Args:
response_id: The response ID from start_research
Returns:
Status dict with completion info
"""
response = await self.client.responses.retrieve(response_id)
result = {
"id": response.id,
"status": response.status,
"model": self.model,
}
if response.status == "completed":
result["output"] = self._format_output(response)
return result
async def wait_for_completion(
self,
response_id: str,
max_wait_minutes: int = 15,
poll_interval: float = POLL_INTERVAL_SECONDS,
) -> dict[str, Any]:
"""
Wait for research to complete, polling at intervals.
Args:
response_id: The response ID from start_research
max_wait_minutes: Maximum minutes to wait
poll_interval: Seconds between polls
Returns:
Final result with report and citations
"""
start_time = time.time()
max_wait_seconds = max_wait_minutes * 60
while True:
result = await self.poll_research(response_id)
if result["status"] == "completed":
return result
if result["status"] in ("failed", "cancelled"):
return {
**result,
"error": f"Research {result['status']}",
}
elapsed = time.time() - start_time
if elapsed >= max_wait_seconds:
return {
**result,
"error": f"Timeout after {max_wait_minutes} minutes",
"status": "timeout",
}
await asyncio.sleep(poll_interval)
def _format_output(self, response) -> dict[str, Any]:
"""
Extract structured output from completed response.
Returns:
Dict with report_text, citations, and metadata
"""
output = {
"report_text": "",
"citations": [],
"web_searches": 0,
"code_executions": 0,
}
if not response.output:
return output
# Process output array
for item in response.output:
item_type = getattr(item, "type", None)
# Count tool calls
if item_type == "web_search_call":
output["web_searches"] += 1
elif item_type == "code_interpreter_call":
output["code_executions"] += 1
# Extract final message content
if item_type == "message":
for content in getattr(item, "content", []):
if getattr(content, "type", None) == "output_text":
output["report_text"] = content.text
# Extract annotations/citations
for annotation in getattr(content, "annotations", []):
if hasattr(annotation, "url"):
output["citations"].append({
"title": getattr(annotation, "title", ""),
"url": annotation.url,
"start_index": getattr(annotation, "start_index", 0),
"end_index": getattr(annotation, "end_index", 0),
})
return output
# Singleton instance
_client: DeepResearchClient | None = None
def get_client() -> DeepResearchClient:
"""Get or create the singleton client instance."""
global _client
if _client is None:
_client = DeepResearchClient()
return _client