Illustrations/web/core/views_tts.py

87 lines
2.8 KiB
Python

# core/views_tts.py
import hashlib
import os
from django.conf import settings
from django.contrib.auth.decorators import login_required, user_passes_test
from django.http import FileResponse, HttpResponseBadRequest, HttpResponseForbidden, Http404
from django.utils.encoding import force_str
from .models import Entry
# OpenAI client
from openai import OpenAI
MODEL_NAME = "gpt-4o-mini-tts" # good quality, lower cost
VOICE_NAME = "alloy" # easy default
AUDIO_FMT = "mp3"
def _ensure_punctuated(s: str) -> str:
s = (s or "").strip()
if not s:
return ""
return s if s.endswith((".", "!", "?", "", "")) else s + "."
def _combine_text(entry: Entry) -> str:
ill = _ensure_punctuated(force_str(entry.illustration or ""))
app = force_str(entry.application or "").strip()
if ill and app:
return ill + " " + app # two spaces
return ill or app or ""
def _tts_cache_dir():
# Put cache under media/tts_cache (self-contained folder)
media_root = getattr(settings, "MEDIA_ROOT", None)
if not media_root:
raise RuntimeError("MEDIA_ROOT must be set to use TTS caching")
path = os.path.join(media_root, "tts_cache")
os.makedirs(path, exist_ok=True)
return path
@login_required
@user_passes_test(lambda u: u.is_staff)
def api_tts_for_entry(request, entry_id: int):
if request.method != "GET":
return HttpResponseBadRequest("GET required")
try:
entry = Entry.objects.get(pk=entry_id)
except Entry.DoesNotExist:
raise Http404("Entry not found")
text = _combine_text(entry)
if not text:
return HttpResponseBadRequest("No text available for this entry")
# Cache key = entry id + hash of text + model/voice
h = hashlib.sha256(f"{entry_id}|{MODEL_NAME}|{VOICE_NAME}|{text}".encode("utf-8")).hexdigest()[:16]
fname = f"entry{entry_id}_{h}.{AUDIO_FMT}"
fpath = os.path.join(_tts_cache_dir(), fname)
# Serve cached if present
if os.path.exists(fpath):
return FileResponse(open(fpath, "rb"), content_type="audio/mpeg")
# Generate via OpenAI and cache
api_key = getattr(settings, "OPENAI_API_KEY", None)
if not api_key:
return HttpResponseBadRequest("OpenAI key not configured")
client = OpenAI(api_key=api_key)
# New SDK returns bytes for TTS:
# https://platform.openai.com/docs/guides/tts
audio = client.audio.speech.create(
model=MODEL_NAME,
voice=VOICE_NAME,
input=text,
format=AUDIO_FMT,
)
# `audio` may be a dict-like with .read() or .content (SDK versions vary).
# Handle both safely:
audio_bytes = getattr(audio, "read", None)
audio_bytes = audio_bytes() if callable(audio_bytes) else getattr(audio, "content", audio)
with open(fpath, "wb") as f:
f.write(audio_bytes)
return FileResponse(open(fpath, "rb"), content_type="audio/mpeg")