From 3c90ec79b1189c098264e1aed8754777b87d2ade Mon Sep 17 00:00:00 2001 From: Joshua Laymon Date: Thu, 21 Aug 2025 01:09:47 +0000 Subject: [PATCH] Add web/core/views_tts.py --- web/core/views_tts.py | 87 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 web/core/views_tts.py diff --git a/web/core/views_tts.py b/web/core/views_tts.py new file mode 100644 index 0000000..76d169e --- /dev/null +++ b/web/core/views_tts.py @@ -0,0 +1,87 @@ +# core/views_tts.py +import hashlib +import os +from django.conf import settings +from django.contrib.auth.decorators import login_required, user_passes_test +from django.http import FileResponse, HttpResponseBadRequest, HttpResponseForbidden, Http404 +from django.utils.encoding import force_str + +from .models import Entry + +# OpenAI client +from openai import OpenAI + +MODEL_NAME = "gpt-4o-mini-tts" # good quality, lower cost +VOICE_NAME = "alloy" # easy default +AUDIO_FMT = "mp3" + +def _ensure_punctuated(s: str) -> str: + s = (s or "").strip() + if not s: + return "" + return s if s.endswith((".", "!", "?", "…", "—")) else s + "." + +def _combine_text(entry: Entry) -> str: + ill = _ensure_punctuated(force_str(entry.illustration or "")) + app = force_str(entry.application or "").strip() + if ill and app: + return ill + " " + app # two spaces + return ill or app or "" + +def _tts_cache_dir(): + # Put cache under media/tts_cache (self-contained folder) + media_root = getattr(settings, "MEDIA_ROOT", None) + if not media_root: + raise RuntimeError("MEDIA_ROOT must be set to use TTS caching") + path = os.path.join(media_root, "tts_cache") + os.makedirs(path, exist_ok=True) + return path + +@login_required +@user_passes_test(lambda u: u.is_staff) +def api_tts_for_entry(request, entry_id: int): + if request.method != "GET": + return HttpResponseBadRequest("GET required") + + try: + entry = Entry.objects.get(pk=entry_id) + except Entry.DoesNotExist: + raise Http404("Entry not found") + + text = _combine_text(entry) + if not text: + return HttpResponseBadRequest("No text available for this entry") + + # Cache key = entry id + hash of text + model/voice + h = hashlib.sha256(f"{entry_id}|{MODEL_NAME}|{VOICE_NAME}|{text}".encode("utf-8")).hexdigest()[:16] + fname = f"entry{entry_id}_{h}.{AUDIO_FMT}" + fpath = os.path.join(_tts_cache_dir(), fname) + + # Serve cached if present + if os.path.exists(fpath): + return FileResponse(open(fpath, "rb"), content_type="audio/mpeg") + + # Generate via OpenAI and cache + api_key = getattr(settings, "OPENAI_API_KEY", None) + if not api_key: + return HttpResponseBadRequest("OpenAI key not configured") + + client = OpenAI(api_key=api_key) + # New SDK returns bytes for TTS: + # https://platform.openai.com/docs/guides/tts + audio = client.audio.speech.create( + model=MODEL_NAME, + voice=VOICE_NAME, + input=text, + format=AUDIO_FMT, + ) + + # `audio` may be a dict-like with .read() or .content (SDK versions vary). + # Handle both safely: + audio_bytes = getattr(audio, "read", None) + audio_bytes = audio_bytes() if callable(audio_bytes) else getattr(audio, "content", audio) + + with open(fpath, "wb") as f: + f.write(audio_bytes) + + return FileResponse(open(fpath, "rb"), content_type="audio/mpeg") \ No newline at end of file