# core/views_tts.py import hashlib import os from django.conf import settings from django.contrib.auth.decorators import login_required, user_passes_test from django.http import FileResponse, HttpResponseBadRequest, HttpResponseForbidden, Http404 from django.utils.encoding import force_str from .models import Entry # OpenAI client from openai import OpenAI MODEL_NAME = "gpt-4o-mini-tts" # good quality, lower cost VOICE_NAME = "alloy" # easy default AUDIO_FMT = "mp3" def _ensure_punctuated(s: str) -> str: s = (s or "").strip() if not s: return "" return s if s.endswith((".", "!", "?", "…", "—")) else s + "." def _combine_text(entry: Entry) -> str: ill = _ensure_punctuated(force_str(entry.illustration or "")) app = force_str(entry.application or "").strip() if ill and app: return ill + " " + app # two spaces return ill or app or "" def _tts_cache_dir(): # Put cache under media/tts_cache (self-contained folder) media_root = getattr(settings, "MEDIA_ROOT", None) if not media_root: raise RuntimeError("MEDIA_ROOT must be set to use TTS caching") path = os.path.join(media_root, "tts_cache") os.makedirs(path, exist_ok=True) return path @login_required @user_passes_test(lambda u: u.is_staff) def api_tts_for_entry(request, entry_id: int): if request.method != "GET": return HttpResponseBadRequest("GET required") try: entry = Entry.objects.get(pk=entry_id) except Entry.DoesNotExist: raise Http404("Entry not found") text = _combine_text(entry) if not text: return HttpResponseBadRequest("No text available for this entry") # Cache key = entry id + hash of text + model/voice h = hashlib.sha256(f"{entry_id}|{MODEL_NAME}|{VOICE_NAME}|{text}".encode("utf-8")).hexdigest()[:16] fname = f"entry{entry_id}_{h}.{AUDIO_FMT}" fpath = os.path.join(_tts_cache_dir(), fname) # Serve cached if present if os.path.exists(fpath): return FileResponse(open(fpath, "rb"), content_type="audio/mpeg") # Generate via OpenAI and cache api_key = getattr(settings, "OPENAI_API_KEY", None) if not api_key: return HttpResponseBadRequest("OpenAI key not configured") client = OpenAI(api_key=api_key) # New SDK returns bytes for TTS: # https://platform.openai.com/docs/guides/tts audio = client.audio.speech.create( model=MODEL_NAME, voice=VOICE_NAME, input=text, format=AUDIO_FMT, ) # `audio` may be a dict-like with .read() or .content (SDK versions vary). # Handle both safely: audio_bytes = getattr(audio, "read", None) audio_bytes = audio_bytes() if callable(audio_bytes) else getattr(audio, "content", audio) with open(fpath, "wb") as f: f.write(audio_bytes) return FileResponse(open(fpath, "rb"), content_type="audio/mpeg")