Update web/core/views_tts.py

2025-08-21 01:36:10 +00:00 · 2025-08-21 01:36:10 +00:00 · 02153083b6
commit 02153083b6
parent 72854a1d20
1 changed files with 43 additions and 75 deletions
--- a/web/core/views_tts.py
+++ b/web/core/views_tts.py
@ -1,87 +1,55 @@
 # core/views_tts.py
-import hashlib
-import os
-from django.conf import settings
-from django.contrib.auth.decorators import login_required, user_passes_test
-from django.http import FileResponse, HttpResponseBadRequest, HttpResponseForbidden, Http404
-from django.utils.encoding import force_str

+from django.contrib.auth.decorators import login_required, user_passes_test
+from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseForbidden
+from django.shortcuts import get_object_or_404
+from django.views.decorators.http import require_GET
 from .models import Entry

-# OpenAI client
+# If you use the OpenAI SDK:
 from openai import OpenAI
+import os

-MODEL_NAME = "gpt-4o-mini-tts"   # good quality, lower cost
-VOICE_NAME = "alloy"             # easy default
-AUDIO_FMT  = "mp3"
+client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", ""))

-def _ensure_punctuated(s: str) -> str:
-    s = (s or "").strip()
-    if not s:
-        return ""
-    return s if s.endswith((".", "!", "?", "…", "—")) else s + "."
-
-def _combine_text(entry: Entry) -> str:
-    ill = _ensure_punctuated(force_str(entry.illustration or ""))
-    app = force_str(entry.application or "").strip()
-    if ill and app:
-        return ill + "  " + app  # two spaces
-    return ill or app or ""
-
-def _tts_cache_dir():
-    # Put cache under media/tts_cache (self-contained folder)
-    media_root = getattr(settings, "MEDIA_ROOT", None)
-    if not media_root:
-        raise RuntimeError("MEDIA_ROOT must be set to use TTS caching")
-    path = os.path.join(media_root, "tts_cache")
-    os.makedirs(path, exist_ok=True)
-    return path
+def _is_staff(user):
+    return user.is_authenticated and (user.is_staff or user.is_superuser)

@login_required
-@user_passes_test(lambda u: u.is_staff)
-def api_tts_for_entry(request, entry_id: int):
-    if request.method != "GET":
-        return HttpResponseBadRequest("GET required")
+@user_passes_test(_is_staff)
+@require_GET
+def api_tts_for_entry(request, entry_id):
+    """
+    Generate MP3 speech for an entry (staff‑only).
+    """
+    entry = get_object_or_404(Entry, pk=entry_id)

+    # ---- Build safe combined text (avoid TypeError) ----
+    ill = (entry.illustration or "").strip()
+    app = (entry.application or "").strip()
+    if ill and not any(ill.endswith(p) for p in ".!?…"):
+        ill = ill + "."
+    combined = "  ".join([t for t in (ill, app) if t]).strip()
+    if not combined:
+        return HttpResponseBadRequest("No text available for this entry.")
+
+    # ---- Call OpenAI TTS (MP3) ----
    try:
-        entry = Entry.objects.get(pk=entry_id)
-    except Entry.DoesNotExist:
-        raise Http404("Entry not found")
+        # gpt-4o-mini-tts → mp3 bytes
+        # voices: "alloy", "verse", "aria", etc.
+        speech = client.audio.speech.create(
+            model="gpt-4o-mini-tts",
+            voice="alloy",
+            input=combined,
+            format="mp3",
+        )
+        audio_bytes = speech.read()  # returns raw bytes
+    except Exception as e:
+        # Return a plain text 500 so your client can preview it
+        return HttpResponse(f"OpenAI TTS failed: {e}", status=500, content_type="text/plain")

-    text = _combine_text(entry)
-    if not text:
-        return HttpResponseBadRequest("No text available for this entry")
-
-    # Cache key = entry id + hash of text + model/voice
-    h = hashlib.sha256(f"{entry_id}|{MODEL_NAME}|{VOICE_NAME}|{text}".encode("utf-8")).hexdigest()[:16]
-    fname = f"entry{entry_id}_{h}.{AUDIO_FMT}"
-    fpath = os.path.join(_tts_cache_dir(), fname)
-
-    # Serve cached if present
-    if os.path.exists(fpath):
-        return FileResponse(open(fpath, "rb"), content_type="audio/mpeg")
-
-    # Generate via OpenAI and cache
-    api_key = getattr(settings, "OPENAI_API_KEY", None)
-    if not api_key:
-        return HttpResponseBadRequest("OpenAI key not configured")
-
-    client = OpenAI(api_key=api_key)
-    # New SDK returns bytes for TTS:
-    # https://platform.openai.com/docs/guides/tts
-    audio = client.audio.speech.create(
-        model=MODEL_NAME,
-        voice=VOICE_NAME,
-        input=text,
-        format=AUDIO_FMT,
-    )
-
-    # `audio` may be a dict-like with .read() or .content (SDK versions vary).
-    # Handle both safely:
-    audio_bytes = getattr(audio, "read", None)
-    audio_bytes = audio_bytes() if callable(audio_bytes) else getattr(audio, "content", audio)
-
-    with open(fpath, "wb") as f:
-        f.write(audio_bytes)
-
-    return FileResponse(open(fpath, "rb"), content_type="audio/mpeg")
+    # ---- Serve as audio/mpeg ----
+    resp = HttpResponse(audio_bytes, content_type="audio/mpeg")
+    resp["Content-Disposition"] = 'inline; filename="entry-tts.mp3"'
+    resp["Cache-Control"] = "no-store"
+    return resp