All checks were successful
Build and Push whisper-client / build-and-push (push) Successful in 58s
237 lines
8.1 KiB
Python
237 lines
8.1 KiB
Python
from flask import session, Flask, render_template, request, jsonify, redirect, url_for, session, send_from_directory, make_response
|
|
import requests
|
|
import os
|
|
import re
|
|
import markdown
|
|
import yt_dlp
|
|
from datetime import datetime
|
|
from werkzeug.utils import secure_filename
|
|
from dotenv import load_dotenv
|
|
|
|
app = Flask(__name__)
|
|
app.config['UPLOAD_FOLDER'] = 'uploads/'
|
|
app.config['MAX_CONTENT_LENGTH'] = 32 * 1024 * 1024 # 16 MB limit
|
|
|
|
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
|
|
FLASK_SECRET_KEY = os.getenv('FLASK_SECRET_KEY')
|
|
|
|
if not (OPENAI_API_KEY and FLASK_SECRET_KEY):
|
|
from dotenv import load_dotenv
|
|
load_dotenv()
|
|
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
|
|
FLASK_SECRET_KEY = os.getenv('FLASK_SECRET_KEY')
|
|
|
|
if not (OPENAI_API_KEY and FLASK_SECRET_KEY):
|
|
raise Exception("FLASK_SECRET_KEY or OPENAI_API_KEY not found in the environment or .env file")
|
|
|
|
|
|
app.secret_key = FLASK_SECRET_KEY
|
|
|
|
|
|
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
|
downloads_path = os.path.join(app.static_folder, 'downloads')
|
|
os.makedirs(downloads_path, exist_ok=True)
|
|
|
|
|
|
|
|
@app.route('/youtube', methods=['POST'])
|
|
def download_youtube_audio():
|
|
youtube_url = request.form['youtube_url']
|
|
summarize_bool = request.form.get('summarize', 'false')
|
|
audio_file_path = download_audio_from_youtube(youtube_url)
|
|
if audio_file_path:
|
|
transcription = transcribe_audio(audio_file_path)
|
|
if summarize_bool:
|
|
transcription = summarize(transcription)
|
|
session['youtube_transcript']=transcription
|
|
return redirect(url_for('index'))
|
|
else:
|
|
return "Failed to download the audio from the provided YouTube URL."
|
|
|
|
def download_audio_from_youtube(url):
|
|
ydl_opts = {
|
|
'format': 'bestaudio/best',
|
|
'outtmpl': 'uploads/%(id)s.%(ext)s',
|
|
}
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
try:
|
|
info_dict = ydl.extract_info(url, download=True)
|
|
audio_file = ydl.prepare_filename(info_dict)
|
|
return audio_file
|
|
except Exception as e:
|
|
print(f"Error downloading audio: {str(e)}")
|
|
return None
|
|
|
|
@app.route('/')
|
|
def index():
|
|
files = os.listdir(downloads_path)
|
|
files = [file for file in files if file.endswith('.mp3')]
|
|
# Retrieve the transcription from query parameters, if present
|
|
transcription = markdown.markdown(request.args.get('transcription', ''))
|
|
download_url=request.args.get('download_url', '')
|
|
youtube_transcript = markdown.markdown(session.get('youtube_transcript',''))
|
|
return render_template('index.html', transcription=transcription,download_url=download_url, files=files, youtube_transcript=youtube_transcript)
|
|
|
|
|
|
@app.route('/upload', methods=['POST'])
|
|
def upload_file():
|
|
if 'file' not in request.files:
|
|
return redirect(request.url)
|
|
file = request.files['file']
|
|
if file.filename == '':
|
|
return redirect(request.url)
|
|
if file:
|
|
filename = secure_filename(file.filename)
|
|
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
|
file.save(file_path)
|
|
transcription = transcribe_audio(file_path)
|
|
# Redirecting to the index route with the transcription result
|
|
return redirect(url_for('index', transcription=transcription))
|
|
|
|
|
|
def beautify_text(text):
|
|
api_url = "https://api.openai.com/v1/chat/completions"
|
|
headers = {
|
|
"Content-Type": "application/json",
|
|
"Authorization": f"Bearer {OPENAI_API_KEY}"
|
|
}
|
|
payload = {
|
|
"model": "gpt-4-turbo-preview",
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": f"Rend ce texte plus aéré et lisible, mais ne le résume pas et ne le tronque pas: {text}"
|
|
}
|
|
]
|
|
}
|
|
|
|
response = requests.post(api_url, json=payload, headers=headers)
|
|
if response.status_code == 200:
|
|
response_data = response.json()
|
|
# Extracting the beautified text from the response
|
|
beautified_text = response_data['choices'][0]['message']['content']
|
|
return beautified_text
|
|
else:
|
|
return f"Failed to beautify text. Status code: {response.status_code}"
|
|
|
|
|
|
def summarize(text):
|
|
api_url = "https://api.openai.com/v1/chat/completions"
|
|
headers = {
|
|
"Content-Type": "application/json",
|
|
"Authorization": f"Bearer {OPENAI_API_KEY}"
|
|
}
|
|
payload = {
|
|
"model": "gpt-4-turbo-preview",
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": f"Résume ce texte, et s'il n'est pas en français, traduis-le: {text}"
|
|
}
|
|
]
|
|
}
|
|
|
|
response = requests.post(api_url, json=payload, headers=headers)
|
|
if response.status_code == 200:
|
|
response_data = response.json()
|
|
# Extracting the beautified text from the response
|
|
summarized = response_data['choices'][0]['message']['content']
|
|
return summarized
|
|
else:
|
|
return f"Failed to summarize text. Status code: {response.status_code}"
|
|
|
|
def transcribe_audio(file_path):
|
|
headers = {
|
|
'Authorization': f'Bearer {OPENAI_API_KEY}'
|
|
}
|
|
|
|
# Assuming 'file_path' is the path to the audio file you want to transcribe.
|
|
try:
|
|
model_parameters= {"model":"whisper-1", "language": "fr"}
|
|
response = requests.post(
|
|
'https://api.openai.com/v1/audio/transcriptions',
|
|
headers=headers,
|
|
files={'file': open(file_path, 'rb')},
|
|
data=model_parameters
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
response_json = response.json()
|
|
transcription_text = response_json.get('text', 'Transcription not found or not available.')
|
|
return transcription_text
|
|
else:
|
|
error_message = f"Failed to transcribe audio. Status code: {response.status_code}."
|
|
try:
|
|
response_json = response.json()
|
|
if 'error' in response_json:
|
|
error_detail = response_json['error']['message']
|
|
error_message += f" Error detail: {error_detail}"
|
|
except ValueError:
|
|
error_message += f" Response: {response.text[:200]}..."
|
|
return error_message
|
|
except Exception as e:
|
|
# Catch any other exceptions, such as network issues or file errors
|
|
return f"An exception occurred: {str(e)}"
|
|
|
|
def sanitize_filename(text):
|
|
"""
|
|
Sanitize the text to make it safe for use in a filename.
|
|
Remove non-alphanumeric characters and replace spaces with underscores.
|
|
"""
|
|
text = re.sub(r'\W+', ' ', text)
|
|
text = text.strip().replace(' ', '_')
|
|
return text
|
|
|
|
@app.route('/synthesize', methods=['POST'])
|
|
def synthesize():
|
|
text = request.form['text-to-synthesize']
|
|
selected_voice = request.form.get('voice', 'nova') # Default to 'nova' if not specified
|
|
file_path = synthesize_speech(text,selected_voice)
|
|
if file_path:
|
|
filename = os.path.basename(file_path)
|
|
return redirect(url_for('index', download_url=filename))
|
|
|
|
def synthesize_speech(text, voice="nova"):
|
|
first_words = ' '.join(text.split()[:5])
|
|
sanitized_text = sanitize_filename(first_words)
|
|
current_datetime = datetime.now().strftime('%Y%m%d-%H%M%S')
|
|
|
|
filename = f"{sanitized_text}_{current_datetime}.mp3"
|
|
file_path = os.path.join('static', 'downloads', filename)
|
|
|
|
|
|
api_url = "https://api.openai.com/v1/audio/speech"
|
|
headers = {
|
|
"Authorization": f"Bearer {OPENAI_API_KEY}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
data = {
|
|
"model": "tts-1",
|
|
"input": text,
|
|
"voice": voice,
|
|
"response_format": "mp3",
|
|
"speed": 1
|
|
}
|
|
response = requests.post(api_url, json=data, headers=headers)
|
|
|
|
if response.status_code == 200:
|
|
file_path = os.path.join('static', 'downloads', filename)
|
|
with open(file_path, 'wb') as audio_file:
|
|
audio_file.write(response.content)
|
|
return file_path
|
|
else:
|
|
return None
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug=True)
|