init
All checks were successful
Build and Push whisper-client / build-and-push (push) Successful in 58s
All checks were successful
Build and Push whisper-client / build-and-push (push) Successful in 58s
This commit is contained in:
4
.env_template
Normal file
4
.env_template
Normal file
@@ -0,0 +1,4 @@
|
||||
OPENAI_API_KEY=
|
||||
FLASK_SECRET_KEY="RANDOMSTRING"
|
||||
WHISPER_URL=
|
||||
WHSIPER_BASICAUTH=
|
||||
41
.gitea/workflows/build.yaml
Normal file
41
.gitea/workflows/build.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
name: Build and Push whisper-client
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: nuc
|
||||
steps:
|
||||
- name: Install Docker
|
||||
run: curl -fsSL https://get.docker.com | sh
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
|
||||
- name: Login to Docker Registry
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.REGISTRY_PASSWORD }}
|
||||
registry: git.xav.ovh/xav
|
||||
|
||||
- name: Build and Push Docker Image for local files
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
tags: git.xav.ovh/xav/whisperclient:latest
|
||||
- name: Run whisper whisperclient
|
||||
env:
|
||||
WHISPER_URL: ${{ secrets.WHISPER_URL }}
|
||||
WHISPER_BASICAUTH: ${{ secrets.WHISPER_BASICAUTH }}
|
||||
FLASK_SECRET_KEY: ${{ secrets.FLASK_SECRET_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
docker compose down ; docker compose up -d
|
||||
|
||||
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
.venv
|
||||
uploads/*
|
||||
static/downloads/*
|
||||
__pycache__
|
||||
.env
|
||||
20
Dockerfile
Normal file
20
Dockerfile
Normal file
@@ -0,0 +1,20 @@
|
||||
# Use an official Python runtime as a parent image
|
||||
FROM python:3.8-slim
|
||||
|
||||
# Set the working directory in the container
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the current directory contents into the container at /app
|
||||
COPY . /app
|
||||
|
||||
# Install any needed packages specified in requirements.txt
|
||||
RUN pip install --trusted-host pypi.python.org -r requirements.txt
|
||||
|
||||
# Make port 5000 available to the world outside this container
|
||||
EXPOSE 5000
|
||||
|
||||
# Define environment variable
|
||||
ENV FLASK_APP=app.py
|
||||
|
||||
# Run app.py when the container launches
|
||||
CMD ["flask", "run", "--host=0.0.0.0"]
|
||||
236
app.py
Normal file
236
app.py
Normal file
@@ -0,0 +1,236 @@
|
||||
from flask import session, Flask, render_template, request, jsonify, redirect, url_for, session, send_from_directory, make_response
|
||||
import requests
|
||||
import os
|
||||
import re
|
||||
import markdown
|
||||
import yt_dlp
|
||||
from datetime import datetime
|
||||
from werkzeug.utils import secure_filename
|
||||
from dotenv import load_dotenv
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = 'uploads/'
|
||||
app.config['MAX_CONTENT_LENGTH'] = 32 * 1024 * 1024 # 16 MB limit
|
||||
|
||||
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
|
||||
FLASK_SECRET_KEY = os.getenv('FLASK_SECRET_KEY')
|
||||
|
||||
if not (OPENAI_API_KEY and FLASK_SECRET_KEY):
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
|
||||
FLASK_SECRET_KEY = os.getenv('FLASK_SECRET_KEY')
|
||||
|
||||
if not (OPENAI_API_KEY and FLASK_SECRET_KEY):
|
||||
raise Exception("FLASK_SECRET_KEY or OPENAI_API_KEY not found in the environment or .env file")
|
||||
|
||||
|
||||
app.secret_key = FLASK_SECRET_KEY
|
||||
|
||||
|
||||
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
||||
downloads_path = os.path.join(app.static_folder, 'downloads')
|
||||
os.makedirs(downloads_path, exist_ok=True)
|
||||
|
||||
|
||||
|
||||
@app.route('/youtube', methods=['POST'])
|
||||
def download_youtube_audio():
|
||||
youtube_url = request.form['youtube_url']
|
||||
summarize_bool = request.form.get('summarize', 'false')
|
||||
audio_file_path = download_audio_from_youtube(youtube_url)
|
||||
if audio_file_path:
|
||||
transcription = transcribe_audio(audio_file_path)
|
||||
if summarize_bool:
|
||||
transcription = summarize(transcription)
|
||||
session['youtube_transcript']=transcription
|
||||
return redirect(url_for('index'))
|
||||
else:
|
||||
return "Failed to download the audio from the provided YouTube URL."
|
||||
|
||||
def download_audio_from_youtube(url):
|
||||
ydl_opts = {
|
||||
'format': 'bestaudio/best',
|
||||
'outtmpl': 'uploads/%(id)s.%(ext)s',
|
||||
}
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
try:
|
||||
info_dict = ydl.extract_info(url, download=True)
|
||||
audio_file = ydl.prepare_filename(info_dict)
|
||||
return audio_file
|
||||
except Exception as e:
|
||||
print(f"Error downloading audio: {str(e)}")
|
||||
return None
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
files = os.listdir(downloads_path)
|
||||
files = [file for file in files if file.endswith('.mp3')]
|
||||
# Retrieve the transcription from query parameters, if present
|
||||
transcription = markdown.markdown(request.args.get('transcription', ''))
|
||||
download_url=request.args.get('download_url', '')
|
||||
youtube_transcript = markdown.markdown(session.get('youtube_transcript',''))
|
||||
return render_template('index.html', transcription=transcription,download_url=download_url, files=files, youtube_transcript=youtube_transcript)
|
||||
|
||||
|
||||
@app.route('/upload', methods=['POST'])
|
||||
def upload_file():
|
||||
if 'file' not in request.files:
|
||||
return redirect(request.url)
|
||||
file = request.files['file']
|
||||
if file.filename == '':
|
||||
return redirect(request.url)
|
||||
if file:
|
||||
filename = secure_filename(file.filename)
|
||||
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
||||
file.save(file_path)
|
||||
transcription = transcribe_audio(file_path)
|
||||
# Redirecting to the index route with the transcription result
|
||||
return redirect(url_for('index', transcription=transcription))
|
||||
|
||||
|
||||
def beautify_text(text):
|
||||
api_url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {OPENAI_API_KEY}"
|
||||
}
|
||||
payload = {
|
||||
"model": "gpt-4-turbo-preview",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Rend ce texte plus aéré et lisible, mais ne le résume pas et ne le tronque pas: {text}"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
response = requests.post(api_url, json=payload, headers=headers)
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
# Extracting the beautified text from the response
|
||||
beautified_text = response_data['choices'][0]['message']['content']
|
||||
return beautified_text
|
||||
else:
|
||||
return f"Failed to beautify text. Status code: {response.status_code}"
|
||||
|
||||
|
||||
def summarize(text):
|
||||
api_url = "https://api.openai.com/v1/chat/completions"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {OPENAI_API_KEY}"
|
||||
}
|
||||
payload = {
|
||||
"model": "gpt-4-turbo-preview",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Résume ce texte, et s'il n'est pas en français, traduis-le: {text}"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
response = requests.post(api_url, json=payload, headers=headers)
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
# Extracting the beautified text from the response
|
||||
summarized = response_data['choices'][0]['message']['content']
|
||||
return summarized
|
||||
else:
|
||||
return f"Failed to summarize text. Status code: {response.status_code}"
|
||||
|
||||
def transcribe_audio(file_path):
|
||||
headers = {
|
||||
'Authorization': f'Bearer {OPENAI_API_KEY}'
|
||||
}
|
||||
|
||||
# Assuming 'file_path' is the path to the audio file you want to transcribe.
|
||||
try:
|
||||
model_parameters= {"model":"whisper-1", "language": "fr"}
|
||||
response = requests.post(
|
||||
'https://api.openai.com/v1/audio/transcriptions',
|
||||
headers=headers,
|
||||
files={'file': open(file_path, 'rb')},
|
||||
data=model_parameters
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
response_json = response.json()
|
||||
transcription_text = response_json.get('text', 'Transcription not found or not available.')
|
||||
return transcription_text
|
||||
else:
|
||||
error_message = f"Failed to transcribe audio. Status code: {response.status_code}."
|
||||
try:
|
||||
response_json = response.json()
|
||||
if 'error' in response_json:
|
||||
error_detail = response_json['error']['message']
|
||||
error_message += f" Error detail: {error_detail}"
|
||||
except ValueError:
|
||||
error_message += f" Response: {response.text[:200]}..."
|
||||
return error_message
|
||||
except Exception as e:
|
||||
# Catch any other exceptions, such as network issues or file errors
|
||||
return f"An exception occurred: {str(e)}"
|
||||
|
||||
def sanitize_filename(text):
|
||||
"""
|
||||
Sanitize the text to make it safe for use in a filename.
|
||||
Remove non-alphanumeric characters and replace spaces with underscores.
|
||||
"""
|
||||
text = re.sub(r'\W+', ' ', text)
|
||||
text = text.strip().replace(' ', '_')
|
||||
return text
|
||||
|
||||
@app.route('/synthesize', methods=['POST'])
|
||||
def synthesize():
|
||||
text = request.form['text-to-synthesize']
|
||||
selected_voice = request.form.get('voice', 'nova') # Default to 'nova' if not specified
|
||||
file_path = synthesize_speech(text,selected_voice)
|
||||
if file_path:
|
||||
filename = os.path.basename(file_path)
|
||||
return redirect(url_for('index', download_url=filename))
|
||||
|
||||
def synthesize_speech(text, voice="nova"):
|
||||
first_words = ' '.join(text.split()[:5])
|
||||
sanitized_text = sanitize_filename(first_words)
|
||||
current_datetime = datetime.now().strftime('%Y%m%d-%H%M%S')
|
||||
|
||||
filename = f"{sanitized_text}_{current_datetime}.mp3"
|
||||
file_path = os.path.join('static', 'downloads', filename)
|
||||
|
||||
|
||||
api_url = "https://api.openai.com/v1/audio/speech"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {OPENAI_API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
data = {
|
||||
"model": "tts-1",
|
||||
"input": text,
|
||||
"voice": voice,
|
||||
"response_format": "mp3",
|
||||
"speed": 1
|
||||
}
|
||||
response = requests.post(api_url, json=data, headers=headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
file_path = os.path.join('static', 'downloads', filename)
|
||||
with open(file_path, 'wb') as audio_file:
|
||||
audio_file.write(response.content)
|
||||
return file_path
|
||||
else:
|
||||
return None
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True)
|
||||
32
compose.yaml
Normal file
32
compose.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
networks:
|
||||
traefik:
|
||||
external: true
|
||||
name: traefik_web
|
||||
|
||||
|
||||
services:
|
||||
app:
|
||||
image: git.xav.ovh/xav/whisperclient
|
||||
restart: 'unless-stopped'
|
||||
labels:
|
||||
traefik.enable: true
|
||||
traefik.docker.network: traefik_web
|
||||
traefik.http.routers.whisper-http.rule: Host(`${WHISPER_URL}`)
|
||||
traefik.http.routers.whisper-http.entrypoints: web
|
||||
traefik.http.routers.whisper-https.rule: Host(`${WHISPER_URL}`)
|
||||
traefik.http.routers.whisper-https.tls: true
|
||||
traefik.http.routers.whisper-https.entrypoints: websecure
|
||||
traefik.http.routers.whisper-https.tls.certresolver: letsencrypt
|
||||
traefik.http.services.whisper.loadbalancer.server.port: 5000
|
||||
traefik.http.routers.whisper-https.middlewares: whisper-auth
|
||||
traefik.http.middlewares.whisper-auth.basicauth.users: ${WHISPER_BASICAUTH}
|
||||
networks:
|
||||
- traefik
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 150M
|
||||
environment:
|
||||
FLASK_SECRET_KEY: ${FLASK_SECRET_KEY}
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||
19
requirements.txt
Normal file
19
requirements.txt
Normal file
@@ -0,0 +1,19 @@
|
||||
blinker==1.7.0
|
||||
Brotli==1.1.0
|
||||
certifi==2024.2.2
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
Flask==3.0.2
|
||||
idna==3.6
|
||||
itsdangerous==2.1.2
|
||||
Jinja2==3.1.3
|
||||
Markdown==3.5.2
|
||||
MarkupSafe==2.1.5
|
||||
mutagen==1.47.0
|
||||
pycryptodomex==3.20.0
|
||||
python-dotenv==1.0.1
|
||||
requests==2.31.0
|
||||
urllib3==2.2.1
|
||||
websockets==12.0
|
||||
Werkzeug==3.0.1
|
||||
yt-dlp==2023.12.30
|
||||
77
templates/index.html
Normal file
77
templates/index.html
Normal file
@@ -0,0 +1,77 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>Whisper Client</title>
|
||||
<link href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container-fluid">
|
||||
<h1>Whisper Client</h1>
|
||||
<div style="margin-top: 100px">
|
||||
<h2>Transcrire un fichier audio</h2>
|
||||
<form action="/upload" method="post" enctype="multipart/form-data">
|
||||
<div class="form-group">
|
||||
<input type="file" name="file" required>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary">Submit</button>
|
||||
</form>
|
||||
{% if transcription %}
|
||||
<div class="alert alert-success" role="alert">
|
||||
<h4 class="alert-heading">Transcription Result:</h4>
|
||||
<p>{{ transcription|safe }}</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
</div>
|
||||
<div style="margin-top: 100px">
|
||||
<h2>Synthétiser du texte</h2>
|
||||
<form action="/synthesize" method="post">
|
||||
|
||||
<textarea name="text-to-synthesize" class="form-control" rows="3"></textarea>
|
||||
<button type="submit" class="btn btn-primary mt-2">Synthesize</button>
|
||||
<div class="form-group">
|
||||
<label for="voice-select">Choisir une voix</label>:</label>
|
||||
<select class="form-control" id="voice-select" name="voice">
|
||||
<option value="alloy">Alloy</option>
|
||||
<option value="echo">Echo</option>
|
||||
<option value="fable">Fable</option>
|
||||
<option value="onyx">Onyx</option>
|
||||
<option value="nova" selected>Nova</option>
|
||||
<option value="shimmer">Shimmer</option>
|
||||
</select>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<div id="download-link-container">
|
||||
</div>
|
||||
<h2>Fichier mp3 disponibles</h2>
|
||||
{% for file in files %}
|
||||
<div>
|
||||
<a href="{{ url_for('static', filename='downloads/' ~ file) }}" download="{{ file }}">Download MP3 - {{ file }}</a>
|
||||
<audio controls>
|
||||
<source src="{{ url_for('static', filename='downloads/' ~ file) }}" type="audio/mp3">
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
</div>
|
||||
{% endfor %}
|
||||
<div style="margin-top: 100px">
|
||||
<h2>Transcrire et résumer youtube</h2>
|
||||
<form action="/youtube" method="post">
|
||||
<div class="form-group">
|
||||
<label for="youtube-url">YouTube URL:</label>
|
||||
<input type="text" class="form-control" id="youtube-url" name="youtube_url" placeholder="Enter YouTube URL here" required>
|
||||
</div>
|
||||
<label for="summarize">Résumer:</label><input type="checkbox" id="summarize" name="summarize"/>
|
||||
<button type="submit" class="btn btn-primary">Transcrire vidéo</button>
|
||||
</div>
|
||||
</form>
|
||||
{% if youtube_transcript %}
|
||||
<div class="alert alert-success" role="alert">
|
||||
<h4 class="alert-heading">Transcription Result:</h4>
|
||||
<p>{{ youtube_transcript|safe }}</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user