nailarais1 commited on
Commit
7799791
·
verified ·
1 Parent(s): b3f208c

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -583
app.py DELETED
@@ -1,583 +0,0 @@
1
- import sympy as sp
2
- import gradio as gr
3
- import os
4
- import re
5
- import numpy as np
6
- from PIL import Image
7
- import io
8
- import tempfile
9
- from sympy import symbols, diff, integrate, limit, sin, cos, tan, log, sqrt, factorial, Matrix, oo, E, I, pi
10
-
11
- # Try to import optional dependencies with fallbacks
12
- try:
13
- import speech_recognition as sr
14
- SPEECH_RECOGNITION_AVAILABLE = True
15
- except ImportError:
16
- SPEECH_RECOGNITION_AVAILABLE = False
17
- print("Speech recognition not available. Install with: pip install SpeechRecognition")
18
-
19
- try:
20
- from gtts import gTTS
21
- GTTS_AVAILABLE = True
22
- except ImportError:
23
- GTTS_AVAILABLE = False
24
- print("gTTS not available. Install with: pip install gTTS")
25
-
26
- try:
27
- import pyttsx3
28
- PYTTSX3_AVAILABLE = False # Initialize as False
29
- try:
30
- engine = pyttsx3.init()
31
- engine.setProperty('rate', 150)
32
- engine.setProperty('volume', 0.9)
33
- PYTTSX3_AVAILABLE = True # Set to True if initialization succeeds
34
- except Exception as e:
35
- print(f"pyttsx3 initialization failed: {e}")
36
- engine = None
37
- except ImportError:
38
- PYTTSX3_AVAILABLE = False
39
- engine = None
40
- print("pyttsx3 not available. Install with: pip install pyttsx3")
41
-
42
-
43
- try:
44
- import pytesseract
45
- TESSERACT_AVAILABLE = True
46
- # Point tesseract_cmd to the correct executable if needed
47
- # pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # Uncomment and modify if tesseract is not in PATH
48
- except ImportError:
49
- TESSERACT_AVAILABLE = False
50
- print("Tesseract not available. Install with: pip install pytesseract && sudo apt install tesseract-ocr")
51
-
52
- try:
53
- from transformers import pipeline
54
- TRANSFORMERS_AVAILABLE = True
55
- except ImportError:
56
- TRANSFORMERS_AVAILABLE = False
57
- print("Transformers not available. Install with: pip install transformers")
58
-
59
- class MathSolver:
60
- def __init__(self):
61
- self.ai_models_loaded = False
62
- self.load_ai_models()
63
-
64
- def load_ai_models(self):
65
- """Load AI models with Hugging Face compatibility"""
66
- if TRANSFORMERS_AVAILABLE:
67
- try:
68
- # Using a simpler model for faster loading in Colab
69
- self.math_solver = pipeline(
70
- "text2text-generation",
71
- model="google/flan-t5-small",
72
- tokenizer="google/flan-t5-small"
73
- )
74
- self.ai_models_loaded = True
75
- print("✅ AI models loaded successfully")
76
- except Exception as e:
77
- print(f"❌ AI model loading failed: {e}")
78
- self.ai_models_loaded = False
79
- else:
80
- print("❌ Transformers not available for AI models")
81
-
82
- def solve_with_ai(self, problem):
83
- """Solve math problems using AI"""
84
- if not self.ai_models_loaded:
85
- return None
86
-
87
- try:
88
- prompt = f"Solve this math problem: {problem}. Provide the final answer."
89
- result = self.math_solver(
90
- prompt,
91
- max_length=100,
92
- num_return_sequences=1,
93
- temperature=0.1
94
- )
95
- # Clean up potential conversational text from AI model
96
- generated_text = result[0]['generated_text']
97
- # Simple regex to try and isolate the math part if AI adds conversational text
98
- math_part = re.search(r'([-+]?\d*\.?\d+([eE][-+]?\d+)?|\S+)', generated_text)
99
- return math_part.group(0) if math_part else generated_text.strip()
100
- except Exception as e:
101
- print(f"AI solving error: {e}")
102
- return None
103
-
104
- # Initialize math solver
105
- math_solver = MathSolver()
106
-
107
- def generate_tts(text, engine_choice="auto"):
108
- """Generate TTS audio - Hugging Face compatible"""
109
- temp_path = None
110
- try:
111
- # Create temp file for audio
112
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
113
- temp_path = temp_file.name
114
- temp_file.close()
115
-
116
- # Clean text for TTS
117
- clean_text = re.sub(r'[**`]', '', text)
118
- # Replace common symbols with words for better pronunciation
119
- clean_text = clean_text.replace('+', ' plus ').replace('-', ' minus ').replace('*', ' times ').replace('/', ' divided by ').replace('**', ' to the power of ')
120
- clean_text = clean_text.replace('\n', '. ')[:300] # Limit length and replace newlines
121
-
122
- success = False
123
-
124
- # Try pyttsx3 first if available and preferred
125
- if engine_choice in ["auto", "pyttsx3"] and PYTTSX3_AVAILABLE and engine:
126
- try:
127
- engine.save_to_file(clean_text, temp_path)
128
- engine.runAndWait()
129
- success = True
130
- # print("Generated audio using pyttsx3") # Debug print
131
- except Exception as e:
132
- print(f"pyttsx3 failed: {e}")
133
- success = False # Ensure success is False on failure
134
-
135
- # Fallback to gTTS if pyttsx3 failed or gTTS is preferred
136
- if not success and (engine_choice in ["auto", "gTTS"] or not PYTTSX3_AVAILABLE) and GTTS_AVAILABLE:
137
- try:
138
- tts = gTTS(text=clean_text, lang='en', slow=False)
139
- tts.save(temp_path)
140
- success = True
141
- # print("Generated audio using gTTS") # Debug print
142
- except Exception as e:
143
- print(f"gTTS failed: {e}")
144
- success = False
145
-
146
- if success:
147
- return temp_path
148
- else:
149
- print("Neither pyttsx3 nor gTTS could generate audio.")
150
- return None
151
-
152
- except Exception as e:
153
- print(f"TTS generation error: {e}")
154
- return None
155
- finally:
156
- # Clean up temp file if generation failed or was not attempted
157
- if temp_path and not os.path.exists(temp_path):
158
- try:
159
- os.unlink(temp_path)
160
- except OSError as e:
161
- print(f"Error removing temp file {temp_path}: {e}")
162
-
163
-
164
- def extract_math_from_image(image_path):
165
- """Extract math from image using OCR"""
166
- if not TESSERACT_AVAILABLE:
167
- return "OCR not available. Please install pytesseract and tesseract-ocr.", ""
168
-
169
- if image_path is None:
170
- return "No image provided.", ""
171
-
172
- try:
173
- # Ensure image_path is a string path
174
- if isinstance(image_path, np.ndarray):
175
- # Save numpy array to a temp file
176
- pil_image = Image.fromarray(image_path.astype('uint8')).convert("RGB")
177
- temp_img_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
178
- image_path = temp_img_file.name
179
- pil_image.save(image_path)
180
- temp_img_file.close()
181
- elif isinstance(image_path, Image.Image):
182
- # Save PIL Image to a temp file
183
- temp_img_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
184
- image_path = temp_img_file.name
185
- image_path.convert("RGB").save(image_path)
186
- temp_img_file.close()
187
- elif not isinstance(image_path, str):
188
- return "Invalid image input type.", ""
189
-
190
- # Configure for math symbols (might need tuning)
191
- # Using --psm 6 for single uniform block of text, --oem 3 for default OCR engine
192
- custom_config = r'--oem 3 --psm 6'
193
- text = pytesseract.image_to_string(image_path, config=custom_config)
194
-
195
- # Clean up temp image file if created
196
- if isinstance(image_path, str) and (image_path.endswith(".png") or image_path.endswith(".jpg")): # Basic check if it's a temp file
197
- try:
198
- os.unlink(image_path)
199
- except OSError as e:
200
- print(f"Error removing temp image file {image_path}: {e}")
201
-
202
-
203
- if text.strip():
204
- # Clean OCR text
205
- cleaned = clean_ocr_text(text)
206
- return f"📷 Extracted: {cleaned}", cleaned
207
- else:
208
- return "❌ No text found in image", ""
209
-
210
- except Exception as e:
211
- return f"❌ Image processing error: {str(e)}", ""
212
-
213
- def clean_ocr_text(text):
214
- """Clean OCR-extracted text"""
215
- corrections = {
216
- '—': '-', '–': '-', '×': '*', '÷': '/',
217
- '**': '^', '``': '"', "''": '"',
218
- 'O': '0', 'o': '0', 'l': '1', 'I': '1',
219
- '=': '==' # For equality checks
220
- }
221
-
222
- cleaned = text
223
- for wrong, correct in corrections.items():
224
- cleaned = cleaned.replace(wrong, correct)
225
-
226
- cleaned = re.sub(r'\s+', ' ', cleaned).strip()
227
- return cleaned
228
-
229
- def voice_to_text(audio_path):
230
- """Convert voice to text"""
231
- if not SPEECH_RECOGNITION_AVAILABLE:
232
- return "Speech recognition not available. Please type your problem."
233
-
234
- if audio_path is None:
235
- return "No audio provided."
236
-
237
- recognizer = sr.Recognizer()
238
- try:
239
- with sr.AudioFile(audio_path) as source:
240
- audio_data = recognizer.record(source)
241
- text = recognizer.recognize_google(audio_data)
242
- return text
243
- except sr.UnknownValueError:
244
- return "Could not understand audio"
245
- except sr.RequestError:
246
- return "Speech service unavailable"
247
- except Exception as e:
248
- return f"Audio error: {str(e)}"
249
-
250
- def convert_speech_to_math(text):
251
- """Convert natural language to math expressions"""
252
- if not text or text.strip() == "":
253
- return "0"
254
-
255
- text = text.lower().strip()
256
-
257
- # Enhanced pattern matching
258
- patterns = [
259
- (r'add\s+(\d+)\s+and\s+(\d+)', r'\1 + \2'),
260
- (r'what is\s+(\d+)\s+plus\s+(\d+)', r'\1 + \2'),
261
- (r'subtract\s+(\d+)\s+from\s+(\d+)', r'\2 - \1'),
262
- (r'(\d+)\s+minus\s+(\d+)', r'\1 - \2'),
263
- (r'multiply\s+(\d+)\s+by\s+(\d+)', r'\1 * \2'),
264
- (r'(\d+)\s+times\s+(\d+)', r'\1 * \2'),
265
- (r'divide\s+(\d+)\s+by\s+(\d+)', r'\1 / \2'),
266
- (r'(\d+)\s+divided by\s+(\d+)', r'\1 / \2'),
267
- (r'(\d+)\s+to the power of\s+(\d+)', r'\1**\2'),
268
- (r'(\d+)\s+squared', r'\1**2'),
269
- (r'(\d+)\s+cubed', r'\1**3'),
270
- (r'square root of\s+(\d+)', r'sqrt(\1)'),
271
- (r'cube root of\s+(\d+)', r'(\1)**(1/3)'),
272
- (r'log of\s+(\d+)', r'log(\1)'),
273
- (r'natural log of\s+(\d+)', r'ln(\1)'),
274
- (r'sine of\s+(.+)', r'sin(\1)'),
275
- (r'cosine of\s+(.+)', r'cos(\1)'),
276
- (r'tangent of\s+(.+)', r'tan(\1)'),
277
- (r'derivative of\s+(.+)', r'diff(\1, x)'),
278
- (r'integral of\s+(.+)', r'integrate(\1, x)'),
279
- (r'factorial of\s+(\d+)', r'factorial(\d+)\)'), # Corrected pattern for factorial
280
- ]
281
-
282
- for pattern, replacement in patterns:
283
- text = re.sub(pattern, replacement, text)
284
-
285
- # Word replacements
286
- replacements = {
287
- 'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4',
288
- 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9',
289
- 'ten': '10', 'plus': '+', 'minus': '-', 'times': '*', 'multiplied by': '*',
290
- 'divided by': '/', 'over': '/', 'pi': 'pi', 'e': 'E', 'equals': '=='
291
- }
292
-
293
- for word, replacement in replacements.items():
294
- text = re.sub(r'\b' + word + r'\b', replacement, text)
295
-
296
- # Clean up
297
- text = re.sub(r'\s*([+\-*/^()])\s*', r'\1', text)
298
- text = re.sub(r'(\d)([a-zA-Z(])', r'\1*\2', text) # Add multiplication sign if missing
299
-
300
- return text
301
-
302
- def evaluate_advanced_math(expression):
303
- """Evaluate mathematical expressions using SymPy"""
304
- x, y, z = symbols('x y z') # Define symbols
305
-
306
- try:
307
- # Handle various operations
308
- expr_lower = expression.lower()
309
-
310
- if 'diff(' in expr_lower:
311
- match = re.search(r'diff\((.*?),\s*(\w+)\)', expression)
312
- if match:
313
- expr_str, var = match.groups()
314
- # Ensure variable is a symbol
315
- return diff(sp.sympify(expr_str), symbols(var))
316
-
317
- elif 'integrate(' in expr_lower or 'int(' in expr_lower:
318
- match = re.search(r'(?:integrate|int)\((.*?),\s*(\w+)\)', expression)
319
- if match:
320
- expr_str, var = match.groups()
321
- # Ensure variable is a symbol
322
- return integrate(sp.sympify(expr_str), symbols(var))
323
-
324
- elif 'limit(' in expr_lower:
325
- match = re.search(r'limit\((.*?),\s*(\w+)\s*->\s*([^)]+)\)', expression)
326
- if match:
327
- expr_str, var, point = match.groups()
328
- # Ensure variable is a symbol
329
- return limit(sp.sympify(expr_str), symbols(var), sp.sympify(point))
330
-
331
- elif 'factorial(' in expr_lower:
332
- match = re.search(r'factorial\((\d+)\)', expression)
333
- if match:
334
- return factorial(int(match.group(1)))
335
-
336
- # Default evaluation using sympify
337
- return sp.sympify(expression)
338
-
339
- except Exception as e:
340
- raise ValueError(f"Could not evaluate: {expression}. Error: {str(e)}")
341
-
342
- def process_math(query, use_ai=True, auto_play=True, tts_engine_choice="auto"):
343
- """Process math query and return result"""
344
- try:
345
- # Convert natural language
346
- math_expr = convert_speech_to_math(query)
347
-
348
- result = None
349
- method_used = "Symbolic Math"
350
-
351
- # Try symbolic math first
352
- try:
353
- result = evaluate_advanced_math(math_expr)
354
- except ValueError:
355
- # If symbolic math failed, try AI if enabled
356
- if use_ai and math_solver.ai_models_loaded:
357
- ai_result = math_solver.solve_with_ai(query)
358
- if ai_result:
359
- result = ai_result
360
- method_used = "AI Model"
361
- else:
362
- result = f"❌ Unable to solve '{query}' using AI. Trying basic evaluation."
363
- method_used = "Fallback Evaluation"
364
-
365
- # Final fallback to basic evaluation if AI also failed or not used
366
- if result is None or "Unable to solve" in str(result):
367
- try:
368
- # Attempt a very basic evaluation, might fail on complex expressions
369
- result = eval(math_expr)
370
- method_used = "Basic Evaluation (eval)"
371
- except:
372
- result = f"❌ Unable to solve '{query}'. Try rephrasing or check syntax."
373
- method_used = "Failed"
374
-
375
-
376
- # Format result
377
- if isinstance(result, sp.Basic): # Check if it's a SymPy object
378
- try:
379
- numerical = result.evalf()
380
- result_text = f"""**Input**: `{query}`
381
- **Symbolic Result**: `{result}`
382
- **Numerical Result**: `{numerical}`
383
- **Method**: {method_used}"""
384
- except Exception as e:
385
- # Handle cases where evalf might fail
386
- result_text = f"""**Input**: `{query}`
387
- **Symbolic Result**: `{result}`
388
- **Numerical Result**: Could not evaluate numerically ({e})
389
- **Method**: {method_used}"""
390
- else: # For results from AI or basic eval
391
- result_text = f"""**Input**: `{query}`
392
- **Result**: `{result}`
393
- **Method**: {method_used}"""
394
-
395
- # Generate audio
396
- audio_path = None
397
- if auto_play and "Unable to solve" not in result_text:
398
- speak_text = f"Result is {result}"
399
- audio_path = generate_tts(speak_text, engine_choice=tts_engine_choice)
400
-
401
- return result_text, audio_path
402
-
403
- except Exception as e:
404
- error_msg = f"❌ An unexpected error occurred: {str(e)}"
405
- audio_path = generate_tts("Sorry, an error occurred while processing that problem.", engine_choice=tts_engine_choice) if auto_play else None
406
- return error_msg, audio_path
407
-
408
- def process_all_inputs(audio=None, text_input=None, image=None, use_ai=True, auto_play=True, tts_engine_choice="auto"):
409
- """Process all input types"""
410
- query = ""
411
- output_message = ""
412
-
413
- # Priority: Image > Audio > Text
414
- if image is not None:
415
- extraction_result, extracted_text = extract_math_from_image(image)
416
- output_message = extraction_result
417
- if extracted_text:
418
- query = extracted_text
419
- else:
420
- # If image processing failed or found no text, return the message and None for audio
421
- audio_path = generate_tts(output_message, engine_choice=tts_engine_choice) if auto_play and "No text found" not in output_message else None
422
- return output_message, audio_path
423
-
424
- if not query and audio is not None:
425
- voice_text = voice_to_text(audio)
426
- if any(msg in voice_text for msg in ["not available", "not understand", "unavailable", "error"]):
427
- return voice_text, None # Return error message and None for audio directly
428
-
429
- query = voice_text
430
- output_message = f"🎤 Transcribed: {query}"
431
-
432
- if not query and text_input:
433
- query = text_input
434
- output_message = f"📝 Input: {query}"
435
-
436
-
437
- if not query:
438
- msg = "Please provide input via voice, text, or image."
439
- audio_path = generate_tts(msg, engine_choice=tts_engine_choice) if auto_play else None
440
- return msg, audio_path
441
-
442
- # Process the math query
443
- result_text, audio_path = process_math(query, use_ai, auto_play, tts_engine_choice)
444
-
445
- # Combine initial message with the result
446
- final_output_text = f"{output_message}\n\n{result_text}" if output_message and "Extracted:" not in output_message else result_text
447
-
448
- # Return the output text and audio path
449
- # Ensure audio_path is None if no audio was generated to satisfy Gradio's expected output format
450
- return final_output_text, audio_path if audio_path and os.path.exists(audio_path) else None
451
-
452
- # Create the interface
453
- def create_interface():
454
- global PYTTSX3_AVAILABLE, GTTS_AVAILABLE, SPEECH_RECOGNITION_AVAILABLE, TESSERACT_AVAILABLE, math_solver
455
-
456
- with gr.Blocks(theme=gr.themes.Soft(), title="Math Solver Pro") as demo:
457
- gr.Markdown("""
458
- # 🧮 Math Solver Pro
459
- **Solve math problems using Voice, Text, or Images with Audio Responses**
460
-
461
- *Powered by SymPy • Hugging Face • Advanced Math Engine*
462
- """)
463
-
464
- with gr.Row():
465
- with gr.Column():
466
- # Input Methods
467
- gr.Markdown("### 📥 Input Methods")
468
-
469
- with gr.Tab("🎤 Voice"):
470
- audio_input = gr.Audio(
471
- sources=["microphone", "upload"],
472
- type="filepath", # Changed to filepath
473
- label="Speak Math Problem"
474
- )
475
-
476
- with gr.Tab("📝 Text"):
477
- text_input = gr.Textbox(
478
- label="Type Math Problem",
479
- placeholder="Examples: 2+2, derivative of x^2, integrate sin(x)",
480
- lines=3
481
- )
482
-
483
- with gr.Tab("📷 Image"):
484
- image_input = gr.Image(
485
- label="Upload Math Image",
486
- type="filepath", # Changed to filepath
487
- show_download_button=False
488
- )
489
-
490
- # Settings
491
- with gr.Accordion("⚙️ Settings", open=False):
492
- with gr.Row():
493
- use_ai = gr.Checkbox(
494
- value=math_solver.ai_models_loaded, # Reflect actual AI load status
495
- label="Use AI Models",
496
- interactive=math_solver.ai_models_loaded # Only interactive if loaded
497
- )
498
- auto_play = gr.Checkbox(
499
- value=True,
500
- label="Auto-Play Audio"
501
- )
502
- with gr.Row():
503
- tts_engine_choice = gr.Radio(
504
- ["auto", "pyttsx3", "gTTS", "None"],
505
- label="TTS Engine",
506
- value="auto",
507
- info="auto: prefers pyttsx3 if available, then gTTS. None: no audio."
508
- )
509
-
510
- # Action Buttons
511
- with gr.Row():
512
- solve_btn = gr.Button("🧠 Solve", variant="primary")
513
- clear_btn = gr.Button("🔄 Clear")
514
-
515
- with gr.Column():
516
- # Results
517
- gr.Markdown("### 📊 Results")
518
- output_text = gr.Markdown(
519
- label="Solution",
520
- value="Your solution will appear here..."
521
- )
522
-
523
- audio_output = gr.Audio(
524
- label="🔊 Audio Result",
525
- autoplay=True,
526
- visible=True,
527
- value=None # Initialize with None
528
- )
529
-
530
- # System Status
531
- with gr.Accordion("🤖 System Status", open=False):
532
- status_text = f"""
533
- **Available Features:**
534
- - ✅ Advanced Math Engine (SymPy)
535
- - {'✅' if SPEECH_RECOGNITION_AVAILABLE else '❌'} Voice Input (Requires `SpeechRecognition`)
536
- - {'✅' if TESSERACT_AVAILABLE else '❌'} Image OCR (Requires `pytesseract` and `tesseract-ocr`)
537
- - {'✅' if GTTS_AVAILABLE else '❌'} Online TTS (Requires `gTTS`)
538
- - {'✅' if PYTTSX3_AVAILABLE else '❌'} Offline TTS (Requires `pyttsx3`)
539
- - {'✅' if math_solver.ai_models_loaded else '❌'} AI Models (Requires `transformers`)
540
- """
541
- gr.Markdown(status_text)
542
-
543
- # Examples
544
- with gr.Accordion("📚 Examples", open=True):
545
- gr.Markdown("""
546
- **Try these examples:**
547
- - **Voice**: "What is 15 times 27?"
548
- - **Text**: `integrate x^2 + 3x + 1 from 0 to 1`
549
- - **Image**: Upload equation photo (e.g., `sqrt(16)`)
550
- - **Text**: `diff(sin(x) + cos(x), x)`
551
- - **Voice**: "Calculate factorial of 7"
552
- """)
553
-
554
- # Event handlers
555
- solve_btn.click(
556
- fn=process_all_inputs,
557
- inputs=[audio_input, text_input, image_input, use_ai, auto_play, tts_engine_choice],
558
- outputs=[output_text, audio_output]
559
- )
560
-
561
- def clear_all():
562
- # Return None for inputs and initial values for outputs to clear the interface
563
- # The temporary file will be managed by Gradio itself when the component value changes
564
- return None, "", None, "Your solution will appear here...", None
565
-
566
- clear_btn.click(
567
- fn=clear_all,
568
- inputs=[], # Clear button doesn't need inputs
569
- outputs=[audio_input, text_input, image_input, output_text, audio_output]
570
- )
571
-
572
- text_input.submit(
573
- fn=process_all_inputs,
574
- inputs=[gr.State(None), text_input, gr.State(None), use_ai, auto_play, tts_engine_choice],
575
- outputs=[output_text, audio_output]
576
- )
577
-
578
- return demo
579
-
580
- # Hugging Face Spaces entry point
581
- if __name__ == "__main__":
582
- demo = create_interface()
583
- demo.launch(share=True)